From 253c25319b3486b918fd2fdb00bf2e5db1450f23 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Wed, 31 Jan 2018 17:38:41 +0100 Subject: [PATCH 001/285] More detailed error message on memory allocation failure --- src/popsift/features.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/popsift/features.cu b/src/popsift/features.cu index 25fd1f57..eeddcd89 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -103,12 +103,16 @@ void FeaturesHost::pin( ) if( err != cudaSuccess ) { cerr << __FILE__ << ":" << __LINE__ << " Runtime warning:" << endl << " Failed to register feature memory in CUDA." << endl + << " Features count: " << getFeatureCount() << endl + << " Memory size requested: " << getFeatureCount() * sizeof(Feature) << endl << " " << cudaGetErrorString(err) << endl; } err = cudaHostRegister( _ori, getDescriptorCount() * sizeof(Descriptor), 0 ); if( err != cudaSuccess ) { cerr << __FILE__ << ":" << __LINE__ << " Runtime warning:" << endl << " Failed to register descriptor memory in CUDA." << endl + << " Descriptors count: " << getDescriptorCount() << endl + << " Memory size requested: " << getDescriptorCount() * sizeof(Descriptor) << endl << " " << cudaGetErrorString(err) << endl; } } From 26a544cfae53835058636bbeefca449af11cce95 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 13 Feb 2018 14:11:24 +0000 Subject: [PATCH 002/285] workable thrust filter with cuda 7 --- src/popsift/s_filtergrid.cu | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index 37238252..67211186 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -17,7 +17,7 @@ using namespace std; -#if (__CUDACC_VER__ >= 80000) && not defined(DISABLE_GRID_FILTER) +#if not defined(DISABLE_GRID_FILTER) #include #include @@ -67,18 +67,6 @@ struct FunctionExtractCell } }; -struct FunctionReversePosition -{ - const int _total; - FunctionReversePosition( int total ) : _total(total) { } - - __host__ __device__ - inline int operator()(int val) const - { - return _total - val - 1; - } -}; - struct FunctionIsAbove { int _limit; @@ -239,12 +227,14 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total ) // inclusive prefix sum thrust::inclusive_scan( h_cell_counts.begin(), h_cell_counts.end(), cell_count_prefix_sums.begin() ); - FunctionReversePosition fun_reverse_pos( n ); + thrust::host_vector h_reverse_index(n); + thrust::sequence( h_reverse_index.begin(), h_reverse_index.end(), + n-1, + -1 ); // sumup[i] = prefix sum[i] + sum( cell[i] copied into remaining cells ) thrust::transform( h_cell_counts.begin(), h_cell_counts.end(), - thrust::make_transform_iterator( thrust::make_counting_iterator(0), - fun_reverse_pos ), + h_reverse_index.begin(), cell_count_sumup.begin(), thrust::multiplies() ); thrust::transform( cell_count_sumup.begin(), cell_count_sumup.end(), From 9f0fe8f8481f6170d2fa78a91d42afe0cf7cfea5 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 14 May 2018 16:00:06 +0200 Subject: [PATCH 003/285] [testing] compare with stored Oxford dataset results --- .gitignore | 1 + CMakeLists.txt | 5 + testScripts/CMakeLists.txt | 25 ++++ testScripts/downloadOxfordDataset.sh.in | 37 ++++++ testScripts/testOxfordDataset.sh.in | 160 ++++++++++++++++++++++++ 5 files changed, 228 insertions(+) create mode 100755 testScripts/CMakeLists.txt create mode 100644 testScripts/downloadOxfordDataset.sh.in create mode 100644 testScripts/testOxfordDataset.sh.in diff --git a/.gitignore b/.gitignore index d93abdc6..722c38cc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ popsift/playground/try-gauss popsift/playground/try-gauss-param popsift/playground/try-gauss-interpolate popsift/playground/try-libav +oxford # Prerequisites *.d diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ae39085..7f474537 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ OPTION(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) OPTION(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) +OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) if(PopSift_USE_POSITION_INDEPENDENT_CODE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -136,6 +137,10 @@ endif(NOT PopSift_USE_GRID_FILTER) add_subdirectory(src) +if(PopSift_USE_TEST_CMD) +add_subdirectory(testScripts) +endif(PopSift_USE_TEST_CMD) + ########### Add uninstall target ############### CONFIGURE_FILE( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in" diff --git a/testScripts/CMakeLists.txt b/testScripts/CMakeLists.txt new file mode 100755 index 00000000..a048a699 --- /dev/null +++ b/testScripts/CMakeLists.txt @@ -0,0 +1,25 @@ +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/downloadOxfordDataset.sh.in + ${CMAKE_CURRENT_BINARY_DIR}/downloadOxfordDataset.sh ) + +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/testOxfordDataset.sh.in + ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh ) + +add_custom_target( + prepare-test + COMMAND /bin/bash ${CMAKE_CURRENT_BINARY_DIR}/downloadOxfordDataset.sh + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/downloadOxfordDataset.sh +) + +add_custom_target( + run-test + COMMAND /bin/bash ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh + DEPENDS popsift-demo +) + +add_custom_target( + run-test-boat + COMMAND /bin/bash ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh boat + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh + DEPENDS popsift-demo +) diff --git a/testScripts/downloadOxfordDataset.sh.in b/testScripts/downloadOxfordDataset.sh.in new file mode 100644 index 00000000..0d933702 --- /dev/null +++ b/testScripts/downloadOxfordDataset.sh.in @@ -0,0 +1,37 @@ +#!/bin/bash + +echo "Making directory @CMAKE_SOURCE_DIR@/oxford" +mkdir -p @CMAKE_SOURCE_DIR@/oxford +echo "Changing to directory @CMAKE_SOURCE_DIR@/oxford" +cd @CMAKE_SOURCE_DIR@/oxford + +for dataset in boat bikes trees graf wall bark leuven ubc +do + if [ ! -d $dataset ] + then + echo "Directory $dataset does not exist - creating" + mkdir $dataset + else + echo -n "Directory $dataset exists. " + fi + cd $dataset + if [ ! -f img1.pgm ] && [ ! -f img1.ppm ] + then + echo "Image img1 in $dataset does not exist" + if [ ! -r $dataset.tar.gz ] + then + echo "tarfile does not exist, downloading" + wget http://www.robots.ox.ac.uk/~vgg/research/affine/det_eval_files/$dataset.tar.gz + else + echo -n "Tarfile exists. " + fi + echo "Unpacking tarfile." + tar zxf $dataset.tar.gz + rm -f $dataset.tar.gz + else + echo "File img1.pgm exists." + rm -f $dataset.tar.gz + fi + cd .. +done + diff --git a/testScripts/testOxfordDataset.sh.in b/testScripts/testOxfordDataset.sh.in new file mode 100644 index 00000000..4b07724d --- /dev/null +++ b/testScripts/testOxfordDataset.sh.in @@ -0,0 +1,160 @@ +#!/bin/bash + +cd @CMAKE_BINARY_DIR@ + +if [ ! -d reference ] +then + echo "Extracting reference images." + tar zxf @CMAKE_CURRENT_SOURCE_DIR@/reference.tgz + echo "Done." +fi + +echo "Making directory @CMAKE_BINARY_DIR@/oxford" +mkdir -p @CMAKE_BINARY_DIR@/oxford +echo "Changing to directory @CMAKE_BINARY_DIR@/oxford" +cd @CMAKE_BINARY_DIR@/oxford + +echo "The command line is $*" + +if [ $# -gt 0 ] +then + dataset_list=$* +else + dataset_list=boat bikes trees graf wall bark leuven ubc +fi + +for dataset in $dataset_list +do + echo "Looking for dataset $dataset" + mkdir -p $dataset + cd $dataset + for img in img1 img2 img3 img4 img5 img6 + do + if [ -f @CMAKE_SOURCE_DIR@/oxford/$dataset/$img.pgm ] + then + imgfile=@CMAKE_SOURCE_DIR@/oxford/$dataset/$img.pgm + elif [ -f @CMAKE_SOURCE_DIR@/oxford/$dataset/$img.ppm ] + then + imgfile=@CMAKE_SOURCE_DIR@/oxford/$dataset/$img.ppm + else + continue + fi + echo "Looking for image file $imgfile" + if [ -d output-$img ] + then + echo "Directory output-$img exists. Skipping." + continue + fi + @CMAKE_BINARY_DIR@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile + if [ $? != 0 ] + then + echo "Running popsift on $imgfile failed." + echo "Stopping." + exit + fi + rm -rf output-$img + mkdir -p output-$img/dir-octave + mkdir -p output-$img/dir-dog + + echo -n "Moving ... " + echo -n "Image pyramid. " + mv dir-octave/* output-$img/dir-octave/ + echo "DoG pyramid. " + mv dir-dog/* output-$img/dir-dog/ + + echo -n "Sorting ... " + echo -n "Keypoints with descriptors. " + sort -n output-features.txt > output-$img/features.txt + echo -n "Keypoints. " + sort -n dir-fpt/desc-pyramid.txt > output-$img/keypoints.txt + echo "Descriptors. " + sort -n dir-desc/desc-pyramid.txt > output-$img/descriptors.txt + rm -rf dir-desc dir-dog dir-dog-dump dir-dog-txt dir-fpt dir-octave dir-octave-dump output-features.txt + done + cd .. +done + +for dataset in $dataset_list +do + for img in img1 img2 img3 img4 img5 img6 + do + if [ -d $dataset/output-$img ] + then + echo -n "Compare $dataset $img with reference. " + + echo -n "Image pyramid " + cd @CMAKE_BINARY_DIR@/oxford/$dataset/output-$img/dir-octave + bad_image_matches=0 + for i in * + do + cmp @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/dir-octave/$i $i + if [ $? != 0 ] + then + if [ $bad_image_matches == 0 ] + then + echo "" + fi + echo "pyramid compare: @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/dir-octave/$i and $i differ" + bad_image_matches=1 + fi + done + if [ $bad_image_matches == 0 ] ; then echo -n "OK. "; fi + cd @CMAKE_BINARY_DIR@/oxford + + echo -n "DoG pyramid " + cd @CMAKE_BINARY_DIR@/oxford/$dataset/output-$img/dir-dog + bad_image_matches=0 + for i in * + do + cmp @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/dir-dog/$i $i + if [ $? != 0 ] + then + if [ $bad_image_matches == 0 ] + then + echo "" + fi + echo "DoG compare: @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/dir-dog/$i and $i differ" + bad_image_matches=1 + fi + done + if [ $bad_image_matches == 0 ] ; then echo -n "OK. "; fi + cd @CMAKE_BINARY_DIR@/oxford + + echo "" + fi + done +done + +for dataset in $dataset_list +do + # cd @CMAKE_BINARY_DIR@/oxford + for img in img1 img2 img3 img4 img5 img6 + do + dir_prefix=@CMAKE_BINARY_DIR@/oxford/$dataset/output-$img + if [ -d $dataset/output-$img ] + then + + echo -n "Compare $dataset $img with reference. " + + echo -n "Features " + cmp -s @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/features.txt $dir_prefix/features.txt + if [ $? == 1 ] + then + echo "BAD. " + echo cmp "@CMAKE_BINARY_DIR@/reference/$dataset/output-$img/features.txt $dir_prefix/features.txt" + diff @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/features.txt $dir_prefix/features.txt | wc -l + else echo -n "OK. " ; fi + + echo -n "Keypoints " + cmp -s @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/keypoints.txt $dir_prefix/keypoints.txt + if [ $? == 1 ] ; then echo -n "BAD. " ; else echo -n "OK. " ; fi + + echo -n "Descriptors " + cmp -s @CMAKE_BINARY_DIR@/reference/$dataset/output-$img/descriptors.txt $dir_prefix/descriptors.txt + if [ $? == 1 ] ; then echo -n "BAD. " ; else echo -n "OK. " ; fi + + echo "" + fi + done +done + From 24b956cd1833db0f4a207087250764f81c96d148 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 30 Apr 2018 09:06:18 +0200 Subject: [PATCH 004/285] [cosmetic] update Thrust comment --- src/popsift/sift_conf.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/popsift/sift_conf.h b/src/popsift/sift_conf.h index fd26ac7c..d9f1a22b 100644 --- a/src/popsift/sift_conf.h +++ b/src/popsift/sift_conf.h @@ -181,11 +181,10 @@ struct Config return _max_extrema; } - /* Filtering extrema is only possible when CUDA version is >= 8.0 - * The reason is that we use Thrust. This allows runtime testing. - * - * Note: re-writing the filtering code is possible, either older - * Thrust semantics, CUDA CUB or doing everything from scratch. + /* Have we enabled filtering? This is a compile time decision. + * The reason is that we use Thrust, which increases compile + * considerably and can be deactivated at the CMake level when + * you work on something else. */ bool getCanFilterExtrema() const; From 79fa3e05a4731eee6daa486a8ed04781181cd823 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 1 Mar 2018 14:19:11 +0100 Subject: [PATCH 005/285] [cmake] Option to use static boost libraries # Conflicts: # CMakeLists.txt --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f474537..cc2b0507 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,11 @@ OPTION(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after e OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) +OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) + +if(PopSift_BOOST_USE_STATIC_LIBS) + set(Boost_USE_STATIC_LIBS ON) +endif() if(PopSift_USE_POSITION_INDEPENDENT_CODE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) From 555882f9f83ee73867774a59f3ede503c2094539 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 14 May 2018 15:02:26 +0200 Subject: [PATCH 006/285] [feature] adapt to CUDA 9 --- CMakeLists.txt | 24 +++++++++---- src/CMakeLists.txt | 12 +++++-- src/popsift/common/assist.h | 32 +++++++++++++++++ src/popsift/common/excl_blk_prefix_sum.h | 8 +++-- src/popsift/common/warp_bitonic_sort.h | 6 ++-- src/popsift/features.cu | 11 +++--- src/popsift/s_desc_grid.cu | 12 +++---- src/popsift/s_desc_igrid.cu | 8 ++--- src/popsift/s_desc_iloop.cu | 12 +++---- src/popsift/s_desc_loop.cu | 12 +++---- src/popsift/s_desc_norm_l2.h | 44 +++++++++++++----------- src/popsift/s_desc_norm_rs.h | 13 +++---- src/popsift/s_desc_notile.cu | 8 ++--- src/popsift/s_extrema.cu | 6 ++-- src/popsift/s_filtergrid.cu | 5 +-- src/popsift/s_orientation.cu | 9 ++--- src/popsift/s_pyramid_build_aa.cu | 8 ++--- src/popsift/s_pyramid_fixed.cu | 4 +-- src/popsift/sift_conf.cu | 2 +- src/popsift/sift_config.h.in | 5 +++ 20 files changed, 153 insertions(+), 88 deletions(-) create mode 100644 src/popsift/sift_config.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index cc2b0507..7a521ca7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) +OPTION(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) if(PopSift_BOOST_USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) @@ -118,13 +119,22 @@ set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy") message(STATUS "CUDA Version is ${CUDA_VERSION}") message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") -if(CUDA_VERSION>=7.5) - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage") - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills") - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage") - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills") -else(CUDA_VERSION>=7.5) -endif(CUDA_VERSION>=7.5) +if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) + if(PopSift_NVCC_WARNINGS) + set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage") + set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills") + set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage") + set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills") + endif(PopSift_NVCC_WARNINGS) +endif( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) + +if( CUDA_VERSION VERSION_GREATER "7.4") + set(HAVE_NORMF 1) +endif( CUDA_VERSION VERSION_GREATER "7.4") + +if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) + set(HAVE_SHFL_DOWN_SYNC 1) +endif( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) # library required for CUDA dynamic parallelism, forgotten by CMake 3.4 cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e12a32f9..11dba5e8 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) -CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) +CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/popsift) CUDA_ADD_LIBRARY(popsift STATIC popsift/popsift.cu popsift/popsift.h @@ -42,6 +42,10 @@ CUDA_ADD_LIBRARY(popsift STATIC popsift/common/vec_macros.h popsift/common/clamp.h ) +configure_file(popsift/sift_config.h.in + ${CMAKE_CURRENT_BINARY_DIR}/popsift/sift_config.h + @ONLY) + # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) target_include_directories(popsift @@ -54,7 +58,7 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") # cannot use PRIVATE here as there is a bug in FindCUDA and CUDA_ADD_LIBRARY # https://gitlab.kitware.com/cmake/cmake/issues/16097 - target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY}) +target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY}) # EXPORTING THE LIBRARY @@ -104,6 +108,10 @@ install(DIRECTORY "popsift" DESTINATION "${include_install_dir}" FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") +install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/popsift" + DESTINATION "${include_install_dir}" + FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") + # Config # * /lib/cmake/${PROJECT_NAME}/${PROJECT_NAME}Config.cmake # * /lib/cmake/${PROJECT_NAME}${PROJECT_NAME}ConfigVersion.cmake diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index e1531eb2..3703554d 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -11,12 +11,44 @@ #include #include // for pthread_self +#include "sift_config.h" namespace popsift { std::ostream& operator<<( std::ostream& ostr, const dim3& p ); +/* + * Assistance with compatibility-breaking builtin function changes + */ +#ifdef HAVE_SHFL_DOWN_SYNC +template __device__ inline T shuffle ( T variable, int src ) { return __shfl_sync ( 0xffffffff, variable, src ); } +template __device__ inline T shuffle_up ( T variable, int delta ) { return __shfl_up_sync ( 0xffffffff, variable, delta ); } +template __device__ inline T shuffle_down( T variable, int delta ) { return __shfl_down_sync( 0xffffffff, variable, delta ); } +template __device__ inline T shuffle_xor ( T variable, int delta ) { return __shfl_xor_sync ( 0xffffffff, variable, delta ); } +__device__ inline unsigned int ballot( unsigned int pred ) { return __ballot_sync ( 0xffffffff, pred ); } +__device__ inline int any ( unsigned int pred ) { return __any_sync ( 0xffffffff, pred ); } +__device__ inline int all ( unsigned int pred ) { return __all_sync ( 0xffffffff, pred ); } + +template __device__ inline T shuffle ( T variable, int src , int ws ) { return __shfl_sync ( 0xffffffff, variable, src , ws ); } +template __device__ inline T shuffle_up ( T variable, int delta, int ws ) { return __shfl_up_sync ( 0xffffffff, variable, delta, ws ); } +template __device__ inline T shuffle_down( T variable, int delta, int ws ) { return __shfl_down_sync( 0xffffffff, variable, delta, ws ); } +template __device__ inline T shuffle_xor ( T variable, int delta, int ws ) { return __shfl_xor_sync ( 0xffffffff, variable, delta, ws ); } +#else +template __device__ inline T shuffle ( T variable, int src ) { return __shfl ( variable, src ); } +template __device__ inline T shuffle_up ( T variable, int delta ) { return __shfl_up ( variable, delta ); } +template __device__ inline T shuffle_down( T variable, int delta ) { return __shfl_down( variable, delta ); } +template __device__ inline T shuffle_xor ( T variable, int delta ) { return __shfl_xor ( variable, delta ); } +__device__ inline unsigned int ballot( unsigned int pred ) { return __ballot ( pred ); } +__device__ inline int any ( unsigned int pred ) { return __any ( pred ); } +__device__ inline int all ( unsigned int pred ) { return __all ( pred ); } + +template __device__ inline T shuffle ( T variable, int src , int ws ) { return __shfl ( variable, src , ws ); } +template __device__ inline T shuffle_up ( T variable, int delta, int ws ) { return __shfl_up ( variable, delta, ws ); } +template __device__ inline T shuffle_down( T variable, int delta, int ws ) { return __shfl_down( variable, delta, ws ); } +template __device__ inline T shuffle_xor ( T variable, int delta, int ws ) { return __shfl_xor ( variable, delta, ws ); } +#endif + /* This computation is needed very frequently when a dim3 grid block is * initialized. It ensure that the tail is not forgotten. */ diff --git a/src/popsift/common/excl_blk_prefix_sum.h b/src/popsift/common/excl_blk_prefix_sum.h index b8c3b6dc..9b5f3d7e 100644 --- a/src/popsift/common/excl_blk_prefix_sum.h +++ b/src/popsift/common/excl_blk_prefix_sum.h @@ -10,6 +10,8 @@ #include #include +#include "assist.h" + namespace ExclusivePrefixSum { class IgnoreTotal @@ -90,7 +92,7 @@ class Block // This loop is an exclusive prefix sum for one warp for( int s=0; s<5; s++ ) { - const int add = __shfl_up( ews+self, 1< #include +#include "assist.h" + namespace popsift { namespace BitonicSort { @@ -57,14 +59,14 @@ class Warp32 int shiftit( const int my_index, const int shift, const int direction, const bool increasing ) { const T my_val = _array[my_index]; - const T other_val = __shfl_xor( my_val, 1 << shift ); + const T other_val = popsift::shuffle_xor( my_val, 1 << shift ); const bool reverse = ( threadIdx.x & ( 1 << direction ) ); const bool id_less = ( ( threadIdx.x & ( 1 << shift ) ) == 0 ); const bool my_more = id_less ? ( my_val > other_val ) : ( my_val < other_val ); const bool must_swap = not ( my_more ^ reverse ^ increasing ); - return ( must_swap ? __shfl_xor( my_index, 1 << shift ) + return ( must_swap ? popsift::shuffle_xor( my_index, 1 << shift ) : my_index ); } diff --git a/src/popsift/features.cu b/src/popsift/features.cu index eeddcd89..5dd753a5 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -17,6 +17,7 @@ #include "features.h" #include "sift_extremum.h" +#include "common/assist.h" #include "common/debug_macros.h" using namespace std; @@ -188,11 +189,11 @@ l2_in_t0( const float4* lptr, const float4* rptr ) + mval.y * mval.y + mval.z * mval.z + mval.w * mval.w; - res += __shfl_down( res, 16 ); - res += __shfl_down( res, 8 ); - res += __shfl_down( res, 4 ); - res += __shfl_down( res, 2 ); - res += __shfl_down( res, 1 ); + res += shuffle_down( res, 16 ); + res += shuffle_down( res, 8 ); + res += shuffle_down( res, 4 ); + res += shuffle_down( res, 2 ); + res += shuffle_down( res, 1 ); return res; } diff --git a/src/popsift/s_desc_grid.cu b/src/popsift/s_desc_grid.cu index 2d911609..be167021 100644 --- a/src/popsift/s_desc_grid.cu +++ b/src/popsift/s_desc_grid.cu @@ -108,12 +108,12 @@ void ext_desc_grid_sub( const int ix, /* reduction here */ for (int i = 0; i < 8; i++) { - // dpt[i] += __shfl_down( dpt[i], 16 ); - dpt[i] += __shfl_down( dpt[i], 8, 16 ); - dpt[i] += __shfl_down( dpt[i], 4, 16 ); - dpt[i] += __shfl_down( dpt[i], 2, 16 ); - dpt[i] += __shfl_down( dpt[i], 1, 16 ); - dpt[i] = __shfl ( dpt[i], 0, 16 ); + // dpt[i] += popsift::shuffle_down( dpt[i], 16 ); + dpt[i] += popsift::shuffle_down( dpt[i], 8, 16 ); + dpt[i] += popsift::shuffle_down( dpt[i], 4, 16 ); + dpt[i] += popsift::shuffle_down( dpt[i], 2, 16 ); + dpt[i] += popsift::shuffle_down( dpt[i], 1, 16 ); + dpt[i] = popsift::shuffle ( dpt[i], 0, 16 ); } diff --git a/src/popsift/s_desc_igrid.cu b/src/popsift/s_desc_igrid.cu index d1ed60bd..9678abde 100644 --- a/src/popsift/s_desc_igrid.cu +++ b/src/popsift/s_desc_igrid.cu @@ -64,10 +64,10 @@ void ext_desc_igrid_sub( const float x, const float y, const int level, /* reduction here */ for (int i = 0; i < 8; i++) { - dpt[i] += __shfl_xor( dpt[i], 1, 16 ); - dpt[i] += __shfl_xor( dpt[i], 2, 16 ); - dpt[i] += __shfl_xor( dpt[i], 4, 16 ); - dpt[i] += __shfl_xor( dpt[i], 8, 16 ); + dpt[i] += popsift::shuffle_xor( dpt[i], 1, 16 ); + dpt[i] += popsift::shuffle_xor( dpt[i], 2, 16 ); + dpt[i] += popsift::shuffle_xor( dpt[i], 4, 16 ); + dpt[i] += popsift::shuffle_xor( dpt[i], 8, 16 ); } if( threadIdx.x < 8 ) { diff --git a/src/popsift/s_desc_iloop.cu b/src/popsift/s_desc_iloop.cu index 5fb9436c..3f370d42 100644 --- a/src/popsift/s_desc_iloop.cu +++ b/src/popsift/s_desc_iloop.cu @@ -116,12 +116,12 @@ void ext_desc_iloop_sub( const float ang, /* reduction here */ for (int i = 0; i < 8; i++) { - dpt[i] += __shfl_down( dpt[i], 16 ); - dpt[i] += __shfl_down( dpt[i], 8 ); - dpt[i] += __shfl_down( dpt[i], 4 ); - dpt[i] += __shfl_down( dpt[i], 2 ); - dpt[i] += __shfl_down( dpt[i], 1 ); - dpt[i] = __shfl ( dpt[i], 0 ); + dpt[i] += popsift::shuffle_down( dpt[i], 16 ); + dpt[i] += popsift::shuffle_down( dpt[i], 8 ); + dpt[i] += popsift::shuffle_down( dpt[i], 4 ); + dpt[i] += popsift::shuffle_down( dpt[i], 2 ); + dpt[i] += popsift::shuffle_down( dpt[i], 1 ); + dpt[i] = popsift::shuffle ( dpt[i], 0 ); } if( threadIdx.x < 8 ) { diff --git a/src/popsift/s_desc_loop.cu b/src/popsift/s_desc_loop.cu index 55196b68..e6491a95 100644 --- a/src/popsift/s_desc_loop.cu +++ b/src/popsift/s_desc_loop.cu @@ -124,12 +124,12 @@ void ext_desc_loop_sub( const float ang, /* reduction here */ for (int i = 0; i < 8; i++) { - dpt[i] += __shfl_down( dpt[i], 16 ); - dpt[i] += __shfl_down( dpt[i], 8 ); - dpt[i] += __shfl_down( dpt[i], 4 ); - dpt[i] += __shfl_down( dpt[i], 2 ); - dpt[i] += __shfl_down( dpt[i], 1 ); - dpt[i] = __shfl ( dpt[i], 0 ); + dpt[i] += popsift::shuffle_down( dpt[i], 16 ); + dpt[i] += popsift::shuffle_down( dpt[i], 8 ); + dpt[i] += popsift::shuffle_down( dpt[i], 4 ); + dpt[i] += popsift::shuffle_down( dpt[i], 2 ); + dpt[i] += popsift::shuffle_down( dpt[i], 1 ); + dpt[i] = popsift::shuffle ( dpt[i], 0 ); } if( threadIdx.x < 8 ) { diff --git a/src/popsift/s_desc_norm_l2.h b/src/popsift/s_desc_norm_l2.h index 79f53f7a..cf2f3f6c 100644 --- a/src/popsift/s_desc_norm_l2.h +++ b/src/popsift/s_desc_norm_l2.h @@ -7,6 +7,8 @@ */ #pragma once #include "s_desc_normalize.h" +#include "common/assist.h" +#include "sift_config.h" using namespace popsift; using namespace std; @@ -48,7 +50,7 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool float4 descr; descr = ptr4[threadIdx.x]; -#if __CUDACC_VER__ >= 70500 +#ifdef HAVE_NORMF // normf() is an elegant function: sqrt(sum_0^127{v^2}) // It exists from CUDA 7.5 but the trouble with CUB on the GTX 980 Ti forces // us to with CUDA 7.0 right now @@ -59,7 +61,7 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool norm = normf( 128, src_desc ); } __syncthreads(); - norm = __shfl( norm, 0 ); + norm = popsift::shuffle( norm, 0 ); descr.x = min( descr.x, 0.2f*norm ); descr.y = min( descr.y, 0.2f*norm ); @@ -70,33 +72,33 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool + descr.y * descr.y + descr.z * descr.z + descr.w * descr.w; - norm += __shfl_down( norm, 16 ); - norm += __shfl_down( norm, 8 ); - norm += __shfl_down( norm, 4 ); - norm += __shfl_down( norm, 2 ); - norm += __shfl_down( norm, 1 ); + norm += popsift::shuffle_down( norm, 16 ); + norm += popsift::shuffle_down( norm, 8 ); + norm += popsift::shuffle_down( norm, 4 ); + norm += popsift::shuffle_down( norm, 2 ); + norm += popsift::shuffle_down( norm, 1 ); if( threadIdx.x == 0 ) { // norm = __fsqrt_rn( norm ); // norm = __fdividef( 512.0f, norm ); norm = __frsqrt_rn( norm ); // inverse square root norm = scalbnf( norm, d_consts.norm_multi ); } -#else +#else // not HAVE_NORMF float norm; norm = descr.x * descr.x + descr.y * descr.y + descr.z * descr.z + descr.w * descr.w; - norm += __shfl_down( norm, 16 ); - norm += __shfl_down( norm, 8 ); - norm += __shfl_down( norm, 4 ); - norm += __shfl_down( norm, 2 ); - norm += __shfl_down( norm, 1 ); + norm += popsift::shuffle_down( norm, 16 ); + norm += popsift::shuffle_down( norm, 8 ); + norm += popsift::shuffle_down( norm, 4 ); + norm += popsift::shuffle_down( norm, 2 ); + norm += popsift::shuffle_down( norm, 1 ); if( threadIdx.x == 0 ) { norm = __fsqrt_rn( norm ); } - norm = __shfl( norm, 0 ); + norm = popsift::shuffle( norm, 0 ); descr.x = min( descr.x, 0.2f*norm ); descr.y = min( descr.y, 0.2f*norm ); @@ -107,19 +109,19 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool + descr.y * descr.y + descr.z * descr.z + descr.w * descr.w; - norm += __shfl_down( norm, 16 ); - norm += __shfl_down( norm, 8 ); - norm += __shfl_down( norm, 4 ); - norm += __shfl_down( norm, 2 ); - norm += __shfl_down( norm, 1 ); + norm += popsift::shuffle_down( norm, 16 ); + norm += popsift::shuffle_down( norm, 8 ); + norm += popsift::shuffle_down( norm, 4 ); + norm += popsift::shuffle_down( norm, 2 ); + norm += popsift::shuffle_down( norm, 1 ); if( threadIdx.x == 0 ) { // norm = __fsqrt_rn( norm ); // norm = __fdividef( 512.0f, norm ); norm = __frsqrt_rn( norm ); // inverse square root norm = scalbnf( norm, d_consts.norm_multi ); } -#endif - norm = __shfl( norm, 0 ); +#endif // HAVE_NORMF + norm = popsift::shuffle( norm, 0 ); descr.x = descr.x * norm; descr.y = descr.y * norm; diff --git a/src/popsift/s_desc_norm_rs.h b/src/popsift/s_desc_norm_rs.h index ef5d8fa4..cf07aa69 100644 --- a/src/popsift/s_desc_norm_rs.h +++ b/src/popsift/s_desc_norm_rs.h @@ -7,6 +7,7 @@ */ #pragma once #include "s_desc_normalize.h" +#include "common/assist.h" using namespace popsift; using namespace std; @@ -50,13 +51,13 @@ void NormalizeRootSift::normalize( const float* src_desc, float* dst_desc, const float sum = descr.x + descr.y + descr.z + descr.w; - sum += __shfl_down( sum, 16 ); - sum += __shfl_down( sum, 8 ); - sum += __shfl_down( sum, 4 ); - sum += __shfl_down( sum, 2 ); - sum += __shfl_down( sum, 1 ); + sum += popsift::shuffle_down( sum, 16 ); + sum += popsift::shuffle_down( sum, 8 ); + sum += popsift::shuffle_down( sum, 4 ); + sum += popsift::shuffle_down( sum, 2 ); + sum += popsift::shuffle_down( sum, 1 ); - sum = __shfl( sum, 0 ); + sum = popsift::shuffle( sum, 0 ); float val; val = scalbnf( __fsqrt_rn( __fdividef( descr.x, sum ) ), diff --git a/src/popsift/s_desc_notile.cu b/src/popsift/s_desc_notile.cu index 689a033c..cff3a96e 100644 --- a/src/popsift/s_desc_notile.cu +++ b/src/popsift/s_desc_notile.cu @@ -79,10 +79,10 @@ void ext_desc_notile_sub( const float x, const float y, const int level, for( int i=0; i<8; i++ ) { - dpt[i] += __shfl_down( dpt[i], 4, 8 ); // add n+4 - dpt[i] += __shfl_down( dpt[i], 2, 8 ); // add n+2 - dpt[i] += __shfl_down( dpt[i], 1, 8 ); // add n+1 - dpt[i] = __shfl ( dpt[i], 0, 8 ); // move 0 to all + dpt[i] += popsift::shuffle_down( dpt[i], 4, 8 ); // add n+4 + dpt[i] += popsift::shuffle_down( dpt[i], 2, 8 ); // add n+2 + dpt[i] += popsift::shuffle_down( dpt[i], 1, 8 ); // add n+1 + dpt[i] = popsift::shuffle ( dpt[i], 0, 8 ); // move 0 to all } __syncthreads(); diff --git a/src/popsift/s_extrema.cu b/src/popsift/s_extrema.cu index 7d9e0d0c..d2c82de7 100644 --- a/src/popsift/s_extrema.cu +++ b/src/popsift/s_extrema.cu @@ -21,9 +21,9 @@ namespace popsift{ template __device__ static inline -uint32_t extrema_count( int indicator, int* extrema_counter ) +uint32_t extrema_count( unsigned int indicator, int* extrema_counter ) { - uint32_t mask = __ballot( indicator ); // bitfield of warps with results + uint32_t mask = popsift::ballot( indicator ); // bitfield of warps with results int ct = __popc( mask ); // horizontal reduce @@ -34,7 +34,7 @@ uint32_t extrema_count( int indicator, int* extrema_counter ) write_index = atomicAdd( extrema_counter, ct ); } // broadcast from thread 0 to all threads in warp - write_index = __shfl( write_index, 0 ); + write_index = popsift::shuffle( write_index, 0 ); // this thread's offset: count only bits below the bit of the own // thread index; this provides the 0 result and every result up to ct diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index 67211186..f4f6e846 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -26,6 +26,7 @@ using namespace std; #include #include #include +#include namespace popsift { @@ -322,7 +323,7 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total ) } }; // namespace popsift -#else // (__CUDACC_VER__ >= 80000) && not defined(DISABLE_GRID_FILTER) +#else // not defined(DISABLE_GRID_FILTER) namespace popsift { @@ -334,5 +335,5 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total ) } }; // namespace popsift -#endif // (__CUDACC_VER__ >= 80000) && not defined(DISABLE_GRID_FILTER) +#endif // not defined(DISABLE_GRID_FILTER) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index 5929a537..64ac5863 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -16,6 +16,7 @@ #include "common/excl_blk_prefix_sum.h" #include "common/warp_bitonic_sort.h" #include "common/debug_macros.h" +#include "common/assist.h" #ifdef USE_NVTX #include @@ -101,7 +102,7 @@ void ori_par( const int octave, int hy = ymax - ymin + 1; int loops = wx * hy; - for( int i = threadIdx.x; ::__any(i < loops); i += blockDim.x ) + for( int i = threadIdx.x; popsift::any(i < loops); i += blockDim.x ) { if( i < loops ) { int yy = i / wx + ymin; @@ -179,7 +180,7 @@ void ori_par( const int octave, __shared__ float refined_angle[64]; __shared__ float yval [64]; - for( int bin = threadIdx.x; ::__any( bin < ORI_NBINS ); bin += blockDim.x ) { + for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x ) { const int prev = bin == 0 ? ORI_NBINS-1 : bin-1; const int next = bin == ORI_NBINS-1 ? 0 : bin+1; @@ -212,7 +213,7 @@ void ori_par( const int octave, // All threads retrieve the yval of thread 0, the largest // of all yvals. const float best_val = yval[best_index.x]; - const float yval_ref = 0.8f * __shfl( best_val, 0 ); + const float yval_ref = 0.8f * popsift::shuffle( best_val, 0 ); const bool valid = ( best_val >= yval_ref ); bool written = false; @@ -229,7 +230,7 @@ void ori_par( const int octave, } } - int angles = __popc( __ballot( written ) ); + int angles = __popc( popsift::ballot( written ) ); if( threadIdx.x == 0 ) { ext->xpos = iext->xpos; ext->ypos = iext->ypos; diff --git a/src/popsift/s_pyramid_build_aa.cu b/src/popsift/s_pyramid_build_aa.cu index 36720b49..1f5cf09f 100755 --- a/src/popsift/s_pyramid_build_aa.cu +++ b/src/popsift/s_pyramid_build_aa.cu @@ -39,11 +39,11 @@ void horiz( cudaTextureObject_t src_point_texture, int shiftval = 0; for( int offset=span-1; offset>0; offset-- ) { shiftval += 1; - const float D1 = __shfl_down( A, shiftval ); - const float D2 = __shfl_up ( C, span - shiftval ); + const float D1 = popsift::shuffle_down( A, shiftval ); + const float D2 = popsift::shuffle_up ( C, span - shiftval ); const float D = threadIdx.x < (32 - shiftval) ? D1 : D2; - const float E1 = __shfl_up ( B, shiftval ); - const float E2 = __shfl_down( C, span - shiftval ); + const float E1 = popsift::shuffle_up ( B, shiftval ); + const float E2 = popsift::shuffle_down( C, span - shiftval ); const float E = threadIdx.x > shiftval ? E1 : E2; g = filter[offset]; out += ( D + E ) * g; diff --git a/src/popsift/s_pyramid_fixed.cu b/src/popsift/s_pyramid_fixed.cu index 87816be0..50a6593c 100755 --- a/src/popsift/s_pyramid_fixed.cu +++ b/src/popsift/s_pyramid_fixed.cu @@ -33,11 +33,11 @@ inline float octave_fixed_horiz( float fval, const float* filter ) float out = fval * filter[0]; #pragma unroll for( int i=1; i<=SHIFT; i++ ) { - float val = __shfl_up( fval, i ) + __shfl_down( fval, i ); + float val = popsift::shuffle_up( fval, i ) + popsift::shuffle_down( fval, i ); out += val * filter[i]; } - fval = __shfl_down( out, SHIFT ); + fval = popsift::shuffle_down( out, SHIFT ); return fval; } diff --git a/src/popsift/sift_conf.cu b/src/popsift/sift_conf.cu index 783eb402..b1ea807f 100644 --- a/src/popsift/sift_conf.cu +++ b/src/popsift/sift_conf.cu @@ -121,7 +121,7 @@ const char* Config::getGaussModeUsage( ) bool Config::getCanFilterExtrema() const { -#if __CUDACC_VER__ >= 80000 +#if __CUDACC_VER_MAJOR__ >= 8 return true; #else return false; diff --git a/src/popsift/sift_config.h.in b/src/popsift/sift_config.h.in new file mode 100644 index 00000000..1721f994 --- /dev/null +++ b/src/popsift/sift_config.h.in @@ -0,0 +1,5 @@ +#pragma once + +#cmakedefine HAVE_SHFL_DOWN_SYNC @HAVE_SHFL_DOWN_SYNC@ +#cmakedefine HAVE_NORMF @HAVE_NORMF@ + From 443968fec013d11c8254adad3960061e2974283c Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 14 May 2018 10:24:17 +0200 Subject: [PATCH 007/285] [cosmetic] more CMake options --- CMakeLists.txt | 19 +++++++++++-------- src/popsift/s_filtergrid.cu | 1 + src/popsift/sift_config.h.in | 1 + 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a521ca7..c00f85b6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ OPTION(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) OPTION(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) +OPTION(PopSift_USE_NORMF "The __normf function computes Euclidian distance on large arrays. Fast but stability is uncertain." OFF) OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) OPTION(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) @@ -128,14 +129,21 @@ if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5" endif(PopSift_NVCC_WARNINGS) endif( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) -if( CUDA_VERSION VERSION_GREATER "7.4") - set(HAVE_NORMF 1) -endif( CUDA_VERSION VERSION_GREATER "7.4") +if(PopSift_USE_NORMF) + if( CUDA_VERSION VERSION_GREATER "7.4") + set(HAVE_NORMF 1) + endif( CUDA_VERSION VERSION_GREATER "7.4") +endif(PopSift_USE_NORMF) if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) set(HAVE_SHFL_DOWN_SYNC 1) endif( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) +if(NOT PopSift_USE_GRID_FILTER) + message(STATUS "Disabling grid filter compilation") + set(DISABLE_GRID_FILTER 1) +endif(NOT PopSift_USE_GRID_FILTER) + # library required for CUDA dynamic parallelism, forgotten by CMake 3.4 cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library") @@ -145,11 +153,6 @@ if(PopSift_USE_NVTX_PROFILING) add_definitions(-DUSE_NVTX) endif(PopSift_USE_NVTX_PROFILING) -if(NOT PopSift_USE_GRID_FILTER) - message(STATUS "Disabling grid filter compilation") - add_definitions(-DDISABLE_GRID_FILTER) -endif(NOT PopSift_USE_GRID_FILTER) - add_subdirectory(src) if(PopSift_USE_TEST_CMD) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index f4f6e846..5d65d822 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -7,6 +7,7 @@ */ #include "sift_pyramid.h" #include "sift_extremum.h" +#include "sift_config.h" #ifdef USE_NVTX #include diff --git a/src/popsift/sift_config.h.in b/src/popsift/sift_config.h.in index 1721f994..85db3740 100644 --- a/src/popsift/sift_config.h.in +++ b/src/popsift/sift_config.h.in @@ -2,4 +2,5 @@ #cmakedefine HAVE_SHFL_DOWN_SYNC @HAVE_SHFL_DOWN_SYNC@ #cmakedefine HAVE_NORMF @HAVE_NORMF@ +#cmakedefine DISABLE_GRID_FILTER @DISABLE_GRID_FILTER@ From 8b36fcc067520ed2068b506971c886cfbd401671 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 14 May 2018 16:27:53 +0200 Subject: [PATCH 008/285] [testing] download reference results --- testScripts/downloadOxfordDataset.sh.in | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testScripts/downloadOxfordDataset.sh.in b/testScripts/downloadOxfordDataset.sh.in index 0d933702..66c1630e 100644 --- a/testScripts/downloadOxfordDataset.sh.in +++ b/testScripts/downloadOxfordDataset.sh.in @@ -1,5 +1,13 @@ #!/bin/bash +cd @CMAKE_CURRENT_SOURCE_DIR@ +if [ ! -f reference.tgz ] +then + echo -n "Fetching reference values. " + wget http://heim.ifi.uio.no/griff/LADIO/files/reference.tgz + echo "Done." +fi + echo "Making directory @CMAKE_SOURCE_DIR@/oxford" mkdir -p @CMAKE_SOURCE_DIR@/oxford echo "Changing to directory @CMAKE_SOURCE_DIR@/oxford" From b1580a788db6b86718d4c80ce81f545e21bb9e6c Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 14 May 2018 16:37:22 +0200 Subject: [PATCH 009/285] [bugfix] string error in test script --- testScripts/testOxfordDataset.sh.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testScripts/testOxfordDataset.sh.in b/testScripts/testOxfordDataset.sh.in index 4b07724d..41424d33 100644 --- a/testScripts/testOxfordDataset.sh.in +++ b/testScripts/testOxfordDataset.sh.in @@ -20,7 +20,7 @@ if [ $# -gt 0 ] then dataset_list=$* else - dataset_list=boat bikes trees graf wall bark leuven ubc + dataset_list="boat bikes trees graf wall bark leuven ubc" fi for dataset in $dataset_list From de48045ed81338921b53dba0d0b39fbf27581cf8 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 14 May 2018 18:19:03 +0200 Subject: [PATCH 010/285] [cosmetic] remove text in cmake else() and endif() --- CMakeLists.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c00f85b6..05da06b6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,19 +59,19 @@ endif() # if(CUDA_VERSION_MAJOR GREATER 7) set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62) -else(CUDA_VERSION_MAJOR GREATER 7) +else() set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 ) -endif(CUDA_VERSION_MAJOR GREATER 7) +endif() set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") if(PopSift_USE_NVTX_PROFILING) message(STATUS "PROFILING CPU CODE: NVTX is in use") -endif(PopSift_USE_NVTX_PROFILING) +endif() if(PopSift_ERRCHK_AFTER_KERNEL) message(STATUS "Synchronizing and checking errors after every kernel call") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL") -endif(PopSift_ERRCHK_AFTER_KERNEL) +endif() set(CUDA_SEPARABLE_COMPILATION ON) @@ -81,7 +81,7 @@ if(UNIX AND NOT APPLE) # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--source-in-ptx") -endif(UNIX AND NOT APPLE) +endif() # The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that # generate an empty string in the nvcc command line causing the compilation to fail. @@ -102,7 +102,7 @@ endif() list(SORT PopSift_CUDA_CC_LIST) foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST}) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}") -endforeach(PopSift_CC_VERSION) +endforeach() # # Use the highest request CUDA CC for CUDA JIT compilation @@ -126,23 +126,23 @@ if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5" set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills") set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage") set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills") - endif(PopSift_NVCC_WARNINGS) -endif( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) + endif() +endif() if(PopSift_USE_NORMF) if( CUDA_VERSION VERSION_GREATER "7.4") set(HAVE_NORMF 1) - endif( CUDA_VERSION VERSION_GREATER "7.4") -endif(PopSift_USE_NORMF) + endif() +endif() if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) set(HAVE_SHFL_DOWN_SYNC 1) -endif( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) +endif() if(NOT PopSift_USE_GRID_FILTER) message(STATUS "Disabling grid filter compilation") set(DISABLE_GRID_FILTER 1) -endif(NOT PopSift_USE_GRID_FILTER) +endif() # library required for CUDA dynamic parallelism, forgotten by CMake 3.4 cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library") @@ -151,13 +151,13 @@ if(PopSift_USE_NVTX_PROFILING) # library required for NVTX profiling of the CPU cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library") add_definitions(-DUSE_NVTX) -endif(PopSift_USE_NVTX_PROFILING) +endif() add_subdirectory(src) if(PopSift_USE_TEST_CMD) -add_subdirectory(testScripts) -endif(PopSift_USE_TEST_CMD) + add_subdirectory(testScripts) +endif() ########### Add uninstall target ############### CONFIGURE_FILE( From 7926c6f4e89745d1432e9ee03d204012f3676d7a Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Mon, 14 May 2018 18:31:35 +0200 Subject: [PATCH 011/285] [cmake] rely on native BUILD_SHARED_LIBS option --- README.md | 1 + src/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 125a8697..b0650783 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Build PopSift has been developed and tested on Linux machines, mostly a variant of Ubuntu, but compiles on MacOSX as well. It comes as a CMake project and requires at least CUDA 7.0 and Boost >= 1.55. It is known to compile and work with NVidia cards of compute capability 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. If you want to avoid building the application you can run cmake with the option `-DPopSift_BUILD_EXAMPLES:BOOL=OFF`. +If you want to build PopSift as a shared library: `-DBUILD_SHARED_LIBS=ON`. In order to build the library you can run: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e12a32f9..7d7a40a1 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,7 +2,7 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) -CUDA_ADD_LIBRARY(popsift STATIC +CUDA_ADD_LIBRARY(popsift popsift/popsift.cu popsift/popsift.h popsift/features.cu popsift/features.h popsift/sift_constants.cu popsift/sift_constants.h From d0169e2656cc1e596a08d647f980531e58587565 Mon Sep 17 00:00:00 2001 From: Yann Lanthony Date: Thu, 24 May 2018 11:05:48 +0200 Subject: [PATCH 012/285] [winbuild] centralize cross-platform functions + remove unix-only includes assist.h: add cross-platform getPageSize, memalign and getCurrentThreadId methods --- src/popsift/common/assist.h | 43 +++++++++++++++++++++++++++++++++- src/popsift/common/plane_2d.cu | 25 ++++---------------- src/popsift/features.cu | 22 +++-------------- src/popsift/popsift.cu | 1 - 4 files changed, 49 insertions(+), 42 deletions(-) diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 3703554d..678b94e1 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -9,7 +9,12 @@ #include #include +#ifdef _WIN32 +#include +#else +#include #include // for pthread_self +#endif #include "sift_config.h" @@ -75,6 +80,14 @@ float readTex( cudaTextureObject_t tex, float x, float y ) return tex2D( tex, x+0.5f, y+0.5f ); } +static size_t getCurrentThreadId() +{ +#ifdef _WIN32 + return GetCurrentThreadId(); +#else + return pthread_self(); +#endif +} /********************************************************************************* * For a debug output to cerr with thread ID at the line start @@ -93,6 +106,34 @@ static inline unsigned int microhash( int val ) ^ ( ( val & ( 0xf << 28 ) ) >> 28 ) ); return ret; } -#define DERR std::cerr << std::hex << popsift::microhash(pthread_self()) << std::dec << " " +#define DERR std::cerr << std::hex << popsift::microhash(getCurrentThreadId()) << std::dec << " " + + +__host__ +static size_t getPageSize() +{ +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#else + return sysconf(_SC_PAGESIZE); +#endif +} + +static void* memalign(size_t alignment, size_t size) +{ +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + void* ret; + int err = posix_memalign( &ret, alignment, size ); + if( err != 0 ) { + errno = err; + ret = 0; + } + return ret; +#endif +} }; // namespace popsift diff --git a/src/popsift/common/plane_2d.cu b/src/popsift/common/plane_2d.cu index 01b95a5d..9e09570e 100644 --- a/src/popsift/common/plane_2d.cu +++ b/src/popsift/common/plane_2d.cu @@ -20,6 +20,7 @@ #include #include "plane_2d.h" +#include "assist.h" #include "debug_macros.h" using namespace std; @@ -45,18 +46,6 @@ void PlaneBase::freeDev2D( void* data ) POP_CUDA_FATAL_TEST( err, "Failed to free CUDA memory: " ); } -__host__ -static long GetPageSize() -{ -#ifdef _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - return si.dwPageSize; -#else - return sysconf(_SC_PAGESIZE); -#endif -} - __host__ void* PlaneBase::allocHost2D( int w, int h, int elemSize, PlaneMapMode m ) { @@ -78,15 +67,9 @@ void* PlaneBase::allocHost2D( int w, int h, int elemSize, PlaneMapMode m ) << " Cause: " << buf << endl; exit( -1 ); } else if( m == PageAligned ) { - void* ptr; - -#ifdef _WIN32 - ptr = _aligned_malloc(sz, GetPageSize()); - if (ptr) return ptr; -#else - int retval = posix_memalign( &ptr, GetPageSize(), sz ); - if( retval == 0 ) return ptr; -#endif + void* ptr = memalign(getPageSize(), sz); + if(ptr) + return ptr; #ifdef _GNU_SOURCE char b[100]; diff --git a/src/popsift/features.cu b/src/popsift/features.cu index 5dd753a5..eb694522 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -7,10 +7,7 @@ */ #include #include -#include -#ifndef __APPLE__ -#include -#endif + #include #include #include @@ -58,25 +55,12 @@ FeaturesHost::~FeaturesHost( ) free( _ori ); } -#ifdef __APPLE__ -static void* memalign( size_t alignment, size_t size ) -{ - void* ret; - int err = posix_memalign( &ret, alignment, size ); - if( err != 0 ) { - errno = err; - ret = 0; - } - return ret; -} -#endif - void FeaturesHost::reset( int num_ext, int num_ori ) { if( _ext != 0 ) { free( _ext ); _ext = 0; } if( _ori != 0 ) { free( _ori ); _ori = 0; } - _ext = (Feature*)memalign( sysconf(_SC_PAGESIZE), num_ext * sizeof(Feature) ); + _ext = (Feature*)memalign( getPageSize(), num_ext * sizeof(Feature) ); if( _ext == 0 ) { cerr << __FILE__ << ":" << __LINE__ << " Runtime error:" << endl << " Failed to (re)allocate memory for downloading " << num_ext << " features" << endl; @@ -84,7 +68,7 @@ void FeaturesHost::reset( int num_ext, int num_ori ) if( errno == ENOMEM ) cerr << " Not enough memory." << endl; exit( -1 ); } - _ori = (Descriptor*)memalign( sysconf(_SC_PAGESIZE), num_ori * sizeof(Descriptor) ); + _ori = (Descriptor*)memalign( getPageSize(), num_ori * sizeof(Descriptor) ); if( _ori == 0 ) { cerr << __FILE__ << ":" << __LINE__ << " Runtime error:" << endl << " Failed to (re)allocate memory for downloading " << num_ori << " descriptors" << endl; diff --git a/src/popsift/popsift.cu b/src/popsift/popsift.cu index fdb95d51..3f0cf043 100755 --- a/src/popsift/popsift.cu +++ b/src/popsift/popsift.cu @@ -6,7 +6,6 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include -#include // for pthread_self #include "sift_constants.h" #include "popsift.h" From 82ba1a51a9f86f8f5db5f5d3d58bdbb366479e35 Mon Sep 17 00:00:00 2001 From: Yann Lanthony Date: Thu, 24 May 2018 11:20:38 +0200 Subject: [PATCH 013/285] [winbuild] cross-platform 'DEPRECATED' indicator --- src/popsift/sift_conf.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/popsift/sift_conf.h b/src/popsift/sift_conf.h index d9f1a22b..c28580a2 100644 --- a/src/popsift/sift_conf.h +++ b/src/popsift/sift_conf.h @@ -15,6 +15,13 @@ #undef USE_DOG_TEX_LINEAR +#ifdef _MSC_VER +#define DEPRECATED(func) __declspec(deprecated) func +#elif defined(__GNUC__) || defined(__clang__) +#define DEPRECATED(func) func __attribute__ ((deprecated)) +#else +#endif + namespace popsift { @@ -158,8 +165,7 @@ struct Config */ void setNormMode( NormMode m ); void setNormMode( const std::string& m ); - void setNormNode( const std::string& m ); - void setUseRootSift( bool on ) __attribute__ ((deprecated)); + DEPRECATED(void setUseRootSift( bool on )); bool getUseRootSift( ) const; NormMode getNormMode( NormMode m ) const; static NormMode getNormModeDefault( ); // Call this from the constructor. From 7da158c9c1b136ea6f319f4aece67602a1c2839d Mon Sep 17 00:00:00 2001 From: Yann Lanthony Date: Thu, 24 May 2018 11:21:40 +0200 Subject: [PATCH 014/285] [winbuild] add missing __device__ attribute --- src/popsift/s_desc_notile.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/popsift/s_desc_notile.cu b/src/popsift/s_desc_notile.cu index cff3a96e..f78e1243 100644 --- a/src/popsift/s_desc_notile.cu +++ b/src/popsift/s_desc_notile.cu @@ -25,6 +25,7 @@ using namespace popsift; +__device__ static const float stepbase = - 2.5f + 1.0f / 16.0f; __device__ static inline From e3a84bc7b291b82ee62164c670f87ba2afee0355 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 12 Feb 2018 10:10:57 +0100 Subject: [PATCH 015/285] [ci] add appveyor --- appveyor.yml | 29 +++++++++++++++++++++++++++++ cudaInstallAppveyor.cmd | 18 ++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 appveyor.yml create mode 100644 cudaInstallAppveyor.cmd diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..1434fb03 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,29 @@ +version: '1.0.{build}' + +image: Visual Studio 2015 + +platform: + - x64 + +configuration: + - Release + - Debug + +install: + - cmd: >- + call cudaInstallAppveyor.cmd + - vcpkg update + - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows cuda:x64-windows + +before_build: + - md build + - cd build + - cmake -G "Visual Studio 14 2015 Win64" -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - ls -l + +build: + project: $(APPVEYOR_BUILD_FOLDER)\build\PopSift.sln + parallel: true + +cache: + c:\tools\vcpkg\installed\ \ No newline at end of file diff --git a/cudaInstallAppveyor.cmd b/cudaInstallAppveyor.cmd new file mode 100644 index 00000000..c5cc9335 --- /dev/null +++ b/cudaInstallAppveyor.cmd @@ -0,0 +1,18 @@ +@echo off +echo Downloading CUDA toolkit 8 +appveyor DownloadFile https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe -FileName cuda_8.0.44_windows.exe +echo Installing CUDA toolkit 8 +cuda_8.0.44_windows.exe -s compiler_8.0 ^ + cublas_8.0 ^ + cublas_dev_8.0 ^ + cudart_8.0 ^ + curand_8.0 ^ + curand_dev_8.0 + +echo CUDA toolkit 8 installed + +dir "%ProgramFiles%" + +set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH% + +nvcc -V \ No newline at end of file From 2edb4f5c6c6f80131e557598c20319a39c12c4f4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 12 Feb 2018 14:11:56 +0100 Subject: [PATCH 016/285] [ci] add appveyor --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 1434fb03..6c880f41 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,7 +13,7 @@ install: - cmd: >- call cudaInstallAppveyor.cmd - vcpkg update - - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows cuda:x64-windows + - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows before_build: - md build From 29eba0bafac31d3cebf5bd541304cb68ced2df17 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 29 May 2018 19:49:18 +0200 Subject: [PATCH 017/285] [ci] trying cuda 9.1 --- cudaInstallAppveyor.cmd | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cudaInstallAppveyor.cmd b/cudaInstallAppveyor.cmd index c5cc9335..381d4048 100644 --- a/cudaInstallAppveyor.cmd +++ b/cudaInstallAppveyor.cmd @@ -1,18 +1,19 @@ @echo off -echo Downloading CUDA toolkit 8 -appveyor DownloadFile https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe -FileName cuda_8.0.44_windows.exe -echo Installing CUDA toolkit 8 -cuda_8.0.44_windows.exe -s compiler_8.0 ^ - cublas_8.0 ^ - cublas_dev_8.0 ^ - cudart_8.0 ^ - curand_8.0 ^ - curand_dev_8.0 +echo Downloading CUDA toolkit 9 +appveyor DownloadFile https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_windows -FileName cuda_9.1.85_windows.exe +dir +echo Installing CUDA toolkit 9 +cuda_9.1.85_windows.exe -s nvcc_9.1 ^ + cublas_9.1 ^ + cublas_dev_9.1 ^ + cudart_9.1 ^ + curand_9.1 ^ + curand_dev_9.1 -echo CUDA toolkit 8 installed +echo CUDA toolkit 9 installed dir "%ProgramFiles%" -set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH% +set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.1\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.1\libnvvp;%PATH% nvcc -V \ No newline at end of file From c6a00965fdce4f48cb6157c7835c30d3a78a3837 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 30 May 2018 21:04:47 +0200 Subject: [PATCH 018/285] [ci] trying more options --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 6c880f41..fed203bb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -18,7 +18,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015 Win64" -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: From 60711623c26acfdb633354b008380df2febd7534 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 1 Jun 2018 18:31:42 +0200 Subject: [PATCH 019/285] [ci] trying out verbosity: detailed --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index fed203bb..2725f4f7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,6 +22,7 @@ before_build: - ls -l build: + verbosity: detailed project: $(APPVEYOR_BUILD_FOLDER)\build\PopSift.sln parallel: true From 861d8c72742c61c6691cd0ce688e76cddd0386e1 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 2 Jun 2018 21:29:26 +0200 Subject: [PATCH 020/285] [ci] using system boost 1.66 --- appveyor.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 2725f4f7..f22bfc64 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,13 +12,14 @@ configuration: install: - cmd: >- call cudaInstallAppveyor.cmd - - vcpkg update - - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows +# - vcpkg update +# - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. +# - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_66_0 .. - ls -l build: From 3259bbb875f9bc36ca0a2c509ca2a716e2df7278 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 2 Jun 2018 22:09:18 +0200 Subject: [PATCH 021/285] [ci] using system boost 1.67 --- appveyor.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index f22bfc64..85bf5df0 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -7,7 +7,6 @@ platform: configuration: - Release - - Debug install: - cmd: >- @@ -19,7 +18,7 @@ before_build: - md build - cd build # - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_66_0 .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_67_0 .. - ls -l build: From c3ca3e6b748decb7fbadb83a998c09943530efc5 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 4 Jun 2018 11:25:58 +0200 Subject: [PATCH 022/285] [ci] testing example --- appveyor.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 85bf5df0..29010adf 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -11,14 +11,15 @@ configuration: install: - cmd: >- call cudaInstallAppveyor.cmd -# - vcpkg update + - vcpkg update # - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows + - vcpkg install --featurepackages devil:x64-windows before_build: - md build - cd build # - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_67_0 .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_67_0 .. - ls -l build: From 066ff0672553f2fc1d999aab550cdd4211830785 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 4 Jun 2018 11:51:51 +0200 Subject: [PATCH 023/285] [ci] testing building examples --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 29010adf..4a73dc58 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -19,7 +19,7 @@ before_build: - md build - cd build # - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_67_0 .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_66_0 -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: From fce62409356591995d077e3b98047e78656728ba Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 5 Jun 2018 23:41:02 +0200 Subject: [PATCH 024/285] [ci] testing without devil --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4a73dc58..484402fe 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -11,9 +11,9 @@ configuration: install: - cmd: >- call cudaInstallAppveyor.cmd - - vcpkg update +# - vcpkg update # - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows - - vcpkg install --featurepackages devil:x64-windows +# - vcpkg install --featurepackages devil:x64-windows before_build: - md build From c1a3f6378473c558f1ee691b01336944c1ecf16f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 5 Jun 2018 23:46:13 +0200 Subject: [PATCH 025/285] [cmake] Devil is not required --- src/application/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 468b6437..67dae6eb 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -13,7 +13,7 @@ else() endif() find_package(Boost 1.53.0 REQUIRED COMPONENTS program_options system filesystem) -find_package(DevIL REQUIRED COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR +find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR set(PD_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) set(PD_LINK_LIBS ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY}) From a96f46fa0f31c0b6e3bd7b9965bd9db2ae304d23 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 6 Jun 2018 00:30:25 +0200 Subject: [PATCH 026/285] [application] simpler code --- src/application/main.cpp | 17 +++++++++++------ src/application/match.cpp | 17 +++++++++++------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index 7f0e7861..edd01301 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -155,12 +155,17 @@ static void collectFilenames( list& inputFiles, const boost::filesystem: std::copy( boost::filesystem::directory_iterator( inputFile ), boost::filesystem::directory_iterator(), std::back_inserter(vec) ); - for( auto it = vec.begin(); it!=vec.end(); it++ ) { - if( boost::filesystem::is_regular_file( *it ) ) { - string s( it->c_str() ); - inputFiles.push_back( s ); - } else if( boost::filesystem::is_directory( *it ) ) { - collectFilenames( inputFiles, *it ); + for (const auto& currPath : vec) + { + if( boost::filesystem::is_regular_file(currPath) ) + { +// string s(it.c_str() ); +// inputFiles.push_back( s ); + inputFiles.push_back( currPath.string() ); + } + else if( boost::filesystem::is_directory(currPath) ) + { + collectFilenames( inputFiles, currPath); } } } diff --git a/src/application/match.cpp b/src/application/match.cpp index 9849b92a..6e68fc55 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -151,12 +151,17 @@ static void collectFilenames( list& inputFiles, const boost::filesystem: std::copy( boost::filesystem::directory_iterator( inputFile ), boost::filesystem::directory_iterator(), std::back_inserter(vec) ); - for( auto it = vec.begin(); it!=vec.end(); it++ ) { - if( boost::filesystem::is_regular_file( *it ) ) { - string s( it->c_str() ); - inputFiles.push_back( s ); - } else if( boost::filesystem::is_directory( *it ) ) { - collectFilenames( inputFiles, *it ); + for (const auto& currPath : vec) + { + if( boost::filesystem::is_regular_file(currPath) ) + { +// string s(it.c_str() ); +// inputFiles.push_back( s ); + inputFiles.push_back( currPath.string() ); + + } else if( boost::filesystem::is_directory(currPath) ) + { + collectFilenames( inputFiles, currPath); } } } From c44fb15736dd8907736cc17237c6c6dfa3c72973 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 6 Jun 2018 00:51:06 +0200 Subject: [PATCH 027/285] [doc] add badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index b0650783..46c26a3d 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ make install Continuous integration: - [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=master)](https://travis-ci.org/alicevision/popsift) master branch. - [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=develop)](https://travis-ci.org/alicevision/popsift) develop branch. +- [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) + develop branch. From da115f40a7ccb3f6aec0efa0363a79387801963c Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 6 Jun 2018 00:52:21 +0200 Subject: [PATCH 028/285] [cmake] improved fallback message for devil --- src/application/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 67dae6eb..bc4b9fdc 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -24,7 +24,7 @@ if(IL_FOUND OR DevIL_FOUND) list(APPEND PD_INCLUDE_DIRS ${IL_INCLUDE_DIR}) list(APPEND PD_LINK_LIBS ${IL_LIBRARIES} ${ILU_LIBRARIES}) else() - message(STATUS "DevIL not found") + message(WARNING "DevIL not found -- Falling back to pgmread") set(PD_COMPILE_OPTIONS "" ) endif() From e104f5fa43a094d9285dff0bc4c46d4f76edb222 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 6 Jun 2018 01:02:21 +0200 Subject: [PATCH 029/285] [applications] cleaning, removed commented code --- src/application/main.cpp | 2 -- src/application/match.cpp | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index edd01301..ad9d2574 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -159,8 +159,6 @@ static void collectFilenames( list& inputFiles, const boost::filesystem: { if( boost::filesystem::is_regular_file(currPath) ) { -// string s(it.c_str() ); -// inputFiles.push_back( s ); inputFiles.push_back( currPath.string() ); } else if( boost::filesystem::is_directory(currPath) ) diff --git a/src/application/match.cpp b/src/application/match.cpp index 6e68fc55..c18e2f48 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -155,11 +155,10 @@ static void collectFilenames( list& inputFiles, const boost::filesystem: { if( boost::filesystem::is_regular_file(currPath) ) { -// string s(it.c_str() ); -// inputFiles.push_back( s ); inputFiles.push_back( currPath.string() ); - } else if( boost::filesystem::is_directory(currPath) ) + } + else if( boost::filesystem::is_directory(currPath) ) { collectFilenames( inputFiles, currPath); } From 8d6769f62145b88dc23b4f04cff3de3a1cd33722 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 12 Jun 2018 16:18:41 +0200 Subject: [PATCH 030/285] refactoring: use cpp instead of cuda file cuda is not necessary for this file --- src/CMakeLists.txt | 2 +- src/popsift/{popsift.cu => popsift.cpp} | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) rename src/popsift/{popsift.cu => popsift.cpp} (98%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eb84e2cc..b2677276 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,7 +3,7 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/popsift) CUDA_ADD_LIBRARY(popsift - popsift/popsift.cu popsift/popsift.h + popsift/popsift.cpp popsift/popsift.h popsift/features.cu popsift/features.h popsift/sift_constants.cu popsift/sift_constants.h popsift/sift_conf.cu popsift/sift_conf.h diff --git a/src/popsift/popsift.cu b/src/popsift/popsift.cpp similarity index 98% rename from src/popsift/popsift.cu rename to src/popsift/popsift.cpp index 3f0cf043..76c52e84 100755 --- a/src/popsift/popsift.cu +++ b/src/popsift/popsift.cpp @@ -7,14 +7,9 @@ */ #include -#include "sift_constants.h" #include "popsift.h" #include "gauss_filter.h" -#include "common/write_plane_2d.h" #include "sift_pyramid.h" -#include "sift_extremum.h" -#include "common/assist.h" -#include "features.h" using namespace std; From 5a3d054b65cbd8fb3fe175d13722a5dd29209d86 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 12 Jun 2018 16:38:29 +0200 Subject: [PATCH 031/285] refactoring: ensures that define variables exist If an include is missing, the compilation will fail instead of having some preprocessor variables becoming undefined. So instead of relying on ifdef/ifndef, we rely on 0/1 values. We use "function define" to ensure that there is no incorrect ifdef/ifndef usage. --- CMakeLists.txt | 12 ++++++++---- src/popsift/common/assist.h | 2 +- src/popsift/s_desc_norm_l2.h | 2 +- src/popsift/s_filtergrid.cu | 2 +- src/popsift/sift_config.h.in | 9 ++++++--- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 05da06b6..443cca71 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,19 +129,23 @@ if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5" endif() endif() -if(PopSift_USE_NORMF) - if( CUDA_VERSION VERSION_GREATER "7.4") - set(HAVE_NORMF 1) - endif() +if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4") + set(HAVE_NORMF 1) +else() + set(HAVE_NORMF 0) endif() if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) set(HAVE_SHFL_DOWN_SYNC 1) +else() + set(HAVE_SHFL_DOWN_SYNC 0) endif() if(NOT PopSift_USE_GRID_FILTER) message(STATUS "Disabling grid filter compilation") set(DISABLE_GRID_FILTER 1) +else() + set(DISABLE_GRID_FILTER 0) endif() # library required for CUDA dynamic parallelism, forgotten by CMake 3.4 diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 678b94e1..b72d6516 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -26,7 +26,7 @@ std::ostream& operator<<( std::ostream& ostr, const dim3& p ); /* * Assistance with compatibility-breaking builtin function changes */ -#ifdef HAVE_SHFL_DOWN_SYNC +#if POPSIFT_IS_DEFINED(POPSIFT_HAVE_SHFL_DOWN_SYNC) template __device__ inline T shuffle ( T variable, int src ) { return __shfl_sync ( 0xffffffff, variable, src ); } template __device__ inline T shuffle_up ( T variable, int delta ) { return __shfl_up_sync ( 0xffffffff, variable, delta ); } template __device__ inline T shuffle_down( T variable, int delta ) { return __shfl_down_sync( 0xffffffff, variable, delta ); } diff --git a/src/popsift/s_desc_norm_l2.h b/src/popsift/s_desc_norm_l2.h index cf2f3f6c..f974aa33 100644 --- a/src/popsift/s_desc_norm_l2.h +++ b/src/popsift/s_desc_norm_l2.h @@ -50,7 +50,7 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool float4 descr; descr = ptr4[threadIdx.x]; -#ifdef HAVE_NORMF +#if POPSIFT_IS_DEFINED(POPSIFT_HAVE_NORMF) // normf() is an elegant function: sqrt(sum_0^127{v^2}) // It exists from CUDA 7.5 but the trouble with CUB on the GTX 980 Ti forces // us to with CUDA 7.0 right now diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index 5d65d822..f655d160 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -18,7 +18,7 @@ using namespace std; -#if not defined(DISABLE_GRID_FILTER) +#if not POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER) #include #include diff --git a/src/popsift/sift_config.h.in b/src/popsift/sift_config.h.in index 85db3740..1337d771 100644 --- a/src/popsift/sift_config.h.in +++ b/src/popsift/sift_config.h.in @@ -1,6 +1,9 @@ #pragma once -#cmakedefine HAVE_SHFL_DOWN_SYNC @HAVE_SHFL_DOWN_SYNC@ -#cmakedefine HAVE_NORMF @HAVE_NORMF@ -#cmakedefine DISABLE_GRID_FILTER @DISABLE_GRID_FILTER@ +#define POPSIFT_IS_DEFINED(F) F() == 1 + +#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ +#define POPSIFT_HAVE_NORMF() @HAVE_NORMF@ +#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ + From dc1402e35314e3735b4082b431dbeed643ba949d Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 12 Jun 2018 16:44:51 +0200 Subject: [PATCH 032/285] [appveyor] use boost from vcpkg --- appveyor.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 484402fe..cc2de880 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -11,15 +11,16 @@ configuration: install: - cmd: >- call cudaInstallAppveyor.cmd -# - vcpkg update -# - vcpkg install --featurepackages boost-system:x64-windows boost-program-options:x64-windows boost-thread:x64-windows boost-filesystem:x64-windows -# - vcpkg install --featurepackages devil:x64-windows + - vcpkg upgrade --no-dry-run + - vcpkg install + boost-system boost-program-options boost-thread boost-filesystem + --triplet %PLATFORM%-windows + # devil before_build: - md build - cd build -# - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DBOOST_ROOT:PATH=C:\Libraries\boost_1_66_0 -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: @@ -28,4 +29,4 @@ build: parallel: true cache: - c:\tools\vcpkg\installed\ \ No newline at end of file + c:\tools\vcpkg\installed\ From 27518e525afb872b25415e20b8e258971042bf46 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 12 Jun 2018 18:41:28 +0200 Subject: [PATCH 033/285] minor: remove unused variables --- src/popsift/popsift.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 76c52e84..bb1cc5eb 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -105,8 +105,7 @@ bool PopSift::private_init( int w, int h ) } if( _config.octaves < 0 ) { - int oct = _config.octaves; - oct = max(int (floor( logf( (float)min( w, h ) ) + int oct = max(int (floor( logf( (float)min( w, h ) ) / logf( 2.0f ) ) - 3.0f + scaleFactor ), 1); _config.octaves = oct; } @@ -201,11 +200,9 @@ void PopSift::extractDownloadLoop( ) bool log_to_file = ( _config.getLogMode() == popsift::Config::All ); if( log_to_file ) { - int octaves = p._pyramid->getNumOctaves(); - + // int octaves = p._pyramid->getNumOctaves(); // for( int o=0; odownload_descriptors( _config, o ); } - - int levels = p._pyramid->getNumLevels(); + // int levels = p._pyramid->getNumLevels(); p._pyramid->download_and_save_array( "pyramid" ); p._pyramid->save_descriptors( _config, features, "pyramid" ); From 65a5eeb0436be3fab8add54484f2c4833a73a04f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 13 Jun 2018 23:44:45 +0200 Subject: [PATCH 034/285] [common] using std::thread::id --- src/popsift/common/assist.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 678b94e1..727afd73 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -13,7 +13,7 @@ #include #else #include -#include // for pthread_self +#include #endif #include "sift_config.h" @@ -80,13 +80,9 @@ float readTex( cudaTextureObject_t tex, float x, float y ) return tex2D( tex, x+0.5f, y+0.5f ); } -static size_t getCurrentThreadId() +inline std::thread::id getCurrentThreadId() { -#ifdef _WIN32 - return GetCurrentThreadId(); -#else - return pthread_self(); -#endif + return std::this_thread::get_id(); } /********************************************************************************* @@ -106,6 +102,13 @@ static inline unsigned int microhash( int val ) ^ ( ( val & ( 0xf << 28 ) ) >> 28 ) ); return ret; } + +static inline unsigned int microhash( const std::thread::id& id ) +{ + std::hash hasher; + return microhash( hasher(id) ); +} + #define DERR std::cerr << std::hex << popsift::microhash(getCurrentThreadId()) << std::dec << " " From c2d4dc7b2db0dedbcdef685b3316ebde8d7474ab Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 14 Jun 2018 14:29:27 +0200 Subject: [PATCH 035/285] [common] fix missing header windows --- src/popsift/common/assist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 03315026..d00f280e 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -9,11 +9,11 @@ #include #include +#include #ifdef _WIN32 #include #else #include -#include #endif #include "sift_config.h" From b6aca69b3dc1f5fff72d675d5c9efaf7581e4051 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Wed, 20 Jun 2018 09:26:28 +0200 Subject: [PATCH 036/285] [bugfix] remove using std to avoid collision --- src/popsift/s_filtergrid.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index f655d160..c724a36a 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -16,8 +16,6 @@ #define nvtxRangePop() #endif -using namespace std; - #if not POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER) #include From c7d61a00887263c86d50515cfe6e875acf4dd9a2 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 26 Jun 2018 17:36:38 +0200 Subject: [PATCH 037/285] [cmake] cuda: use cuda shared runtime when building popsift as shared library --- CMakeLists.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 443cca71..38e63ac7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,19 @@ if(WIN32) link_directories(Boost_LIBRARRY_DIR_RELEASE) endif(WIN32) +if(BUILD_SHARED_LIBS) + message(STATUS "BUILD_SHARED_LIBS ON") + # Need to declare CUDA_USE_STATIC_CUDA_RUNTIME as an option to ensure that it is not overwritten in FindCUDA. + option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF) + set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) + # Workaround to force deactivation of cuda static runtime for cmake < 3.10 + set(CUDA_cudart_static_LIBRARY 0) +else() + message(STATUS "BUILD_SHARED_LIBS OFF") + option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON) + set(CUDA_USE_STATIC_CUDA_RUNTIME ON) +endif() + find_package(CUDA 7.0 REQUIRED) if(NOT CUDA_FOUND) From 8584fa164e6a0684d11a5e55d00f0d6b186558f1 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 26 Jun 2018 17:37:20 +0200 Subject: [PATCH 038/285] [cmake] use FindCUDA provided by cmake --- src/cmake/FindCUDA.cmake | 1796 -------------------------- src/cmake/FindCUDA/make2cmake.cmake | 93 -- src/cmake/FindCUDA/parse_cubin.cmake | 110 -- src/cmake/FindCUDA/run_nvcc.cmake | 288 ----- 4 files changed, 2287 deletions(-) delete mode 100755 src/cmake/FindCUDA.cmake delete mode 100755 src/cmake/FindCUDA/make2cmake.cmake delete mode 100755 src/cmake/FindCUDA/parse_cubin.cmake delete mode 100755 src/cmake/FindCUDA/run_nvcc.cmake diff --git a/src/cmake/FindCUDA.cmake b/src/cmake/FindCUDA.cmake deleted file mode 100755 index e9a2505f..00000000 --- a/src/cmake/FindCUDA.cmake +++ /dev/null @@ -1,1796 +0,0 @@ -#.rst: -# FindCUDA -# -------- -# -# Tools for building CUDA C files: libraries and build dependencies. -# -# This script locates the NVIDIA CUDA C tools. It should work on linux, -# windows, and mac and should be reasonably up to date with CUDA C -# releases. -# -# This script makes use of the standard find_package arguments of -# , REQUIRED and QUIET. CUDA_FOUND will report if an -# acceptable version of CUDA was found. -# -# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if -# the prefix cannot be determined by the location of nvcc in the system -# path and REQUIRED is specified to find_package(). To use a different -# installed version of the toolkit set the environment variable -# CUDA_BIN_PATH before running cmake (e.g. -# CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default -# /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring. If -# you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that -# depend on the path will be relocated. -# -# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain -# platforms, or to use a cuda runtime not installed in the default -# location. In newer versions of the toolkit the cuda library is -# included with the graphics driver- be sure that the driver version -# matches what is needed by the cuda runtime version. -# -# The following variables affect the behavior of the macros in the -# script (in alphebetical order). Note that any of these flags can be -# changed multiple times in the same directory before calling -# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX -# or CUDA_WRAP_SRCS. -# -# :: -# -# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) -# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. -# Note that making this different from the host code when generating object -# or C files from CUDA code just won't work, because size_t gets defined by -# nvcc in the generated source. If you compile to PTX and then load the -# file yourself, you can mix bit sizes between device and host. -# -# -# -# :: -# -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) -# -- Set to ON if you want the custom build rule to be attached to the source -# file in Visual Studio. Turn OFF if you add the same cuda file to multiple -# targets. -# -# -# -# :: -# -# This allows the user to build the target from the CUDA file; however, bad -# things can happen if the CUDA source file is added to multiple targets. -# When performing parallel builds it is possible for the custom build -# command to be run more than once and in parallel causing cryptic build -# errors. VS runs the rules for every source file in the target, and a -# source can have only one rule no matter how many projects it is added to. -# When the rule is run from multiple targets race conditions can occur on -# the generated file. Eventually everything will get built, but if the user -# is unaware of this behavior, there may be confusion. It would be nice if -# this script could detect the reuse of source files across multiple targets -# and turn the option off for the user, but no good solution could be found. -# -# -# -# :: -# -# CUDA_BUILD_CUBIN (Default OFF) -# -- Set to ON to enable and extra compilation pass with the -cubin option in -# Device mode. The output is parsed and register, shared memory usage is -# printed during build. -# -# -# -# :: -# -# CUDA_BUILD_EMULATION (Default OFF for device mode) -# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files -# when CUDA_BUILD_EMULATION is TRUE. -# -# -# -# :: -# -# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) -# -- Set to the path you wish to have the generated files placed. If it is -# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. -# Intermediate files will always be placed in -# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. -# -# -# -# :: -# -# CUDA_HOST_COMPILATION_CPP (Default ON) -# -- Set to OFF for C compilation of host code. -# -# -# -# :: -# -# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS) -# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or -# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or -# CUDA_NVCC_FLAGS_ variables. For Visual Studio targets -# $(VCInstallDir)/bin is a special value that expands out to the path when -# the command is run from withing VS. -# -# -# -# :: -# -# CUDA_NVCC_FLAGS -# CUDA_NVCC_FLAGS_ -# -- Additional NVCC command line arguments. NOTE: multiple arguments must be -# semi-colon delimited (e.g. --compiler-options;-Wall) -# -# -# -# :: -# -# CUDA_PROPAGATE_HOST_FLAGS (Default ON) -# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration -# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the -# host compiler through nvcc's -Xcompiler flag. This helps make the -# generated host code match the rest of the system better. Sometimes -# certain flags give nvcc problems, and this will help you turn the flag -# propagation off. This does not affect the flags supplied directly to nvcc -# via CUDA_NVCC_FLAGS or through the OPTION flags specified through -# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for -# shared library compilation are not affected by this flag. -# -# -# -# :: -# -# CUDA_SEPARABLE_COMPILATION (Default OFF) -# -- If set this will enable separable compilation for all CUDA runtime object -# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY -# (e.g. calling CUDA_WRAP_SRCS directly), -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called. -# -# -# -# :: -# -# CUDA_VERBOSE_BUILD (Default OFF) -# -- Set to ON to see all the commands used when building the CUDA file. When -# using a Makefile generator the value defaults to VERBOSE (run make -# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will -# always print the output. -# -# -# -# The script creates the following macros (in alphebetical order): -# -# :: -# -# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) -# -- Adds the cufft library to the target (can be any target). Handles whether -# you are in emulation mode or not. -# -# -# -# :: -# -# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) -# -- Adds the cublas library to the target (can be any target). Handles -# whether you are in emulation mode or not. -# -# -# -# :: -# -# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... -# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Creates an executable "cuda_target" which is made up of the files -# specified. All of the non CUDA C files are compiled using the standard -# build rules specified by CMAKE and the cuda files are compiled to object -# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is -# added automatically to include_directories(). Some standard CMake target -# calls can be used on the target after calling this macro -# (e.g. set_target_properties and target_link_libraries), but setting -# properties that adjust compilation flags will not affect code compiled by -# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. -# -# -# -# :: -# -# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... -# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. -# -# -# -# :: -# -# CUDA_BUILD_CLEAN_TARGET() -# -- Creates a convience target that deletes all the dependency files -# generated. You should make clean after running this target to ensure the -# dependency files get regenerated. -# -# -# -# :: -# -# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] -# [OPTIONS ...] ) -# -- Returns a list of generated files from the input source files to be used -# with ADD_LIBRARY or ADD_EXECUTABLE. -# -# -# -# :: -# -# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of PTX files generated from the input source files. -# -# -# -# :: -# -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var -# cuda_target -# object_files ) -# -- Compute the name of the intermediate link file used for separable -# compilation. This file name is typically passed into -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced -# based on cuda_target the list of objects files that need separable -# compilation as specified by object_files. If the object_files list is -# empty, then output_file_var will be empty. This function is called -# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that -# this is a function and not a macro. -# -# -# -# :: -# -# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) -# -- Sets the directories that should be passed to nvcc -# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu -# files. -# -# -# -# -# -# :: -# -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target -# nvcc_flags object_files) -# -# -# -# :: -# -# -- Generates the link object required by separable compilation from the given -# object files. This is called automatically for CUDA_ADD_EXECUTABLE and -# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS -# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the -# nvcc_flags passed in are the same as the flags passed in via the OPTIONS -# argument. The only nvcc flag added automatically is the bitness flag as -# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function -# instead of a macro. -# -# -# -# :: -# -# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... -# [STATIC | SHARED | MODULE] [OPTIONS ...] ) -# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this -# function under the hood. -# -# -# -# :: -# -# Given the list of files (file0 file1 ... fileN) this macro generates -# custom commands that generate either PTX or linkable objects (use "PTX" or -# "OBJ" for the format argument to switch). Files that don't end with .cu -# or have the HEADER_FILE_ONLY property are ignored. -# -# -# -# :: -# -# The arguments passed in after OPTIONS are extra command line options to -# give to nvcc. You can also specify per configuration options by -# specifying the name of the configuration followed by the options. General -# options must preceed configuration specific options. Not all -# configurations need to be specified, only the ones provided will be used. -# -# -# -# :: -# -# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" -# DEBUG -g -# RELEASE --use_fast_math -# RELWITHDEBINFO --use_fast_math;-g -# MINSIZEREL --use_fast_math -# -# -# -# :: -# -# For certain configurations (namely VS generating object files with -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will -# be produced for the given cuda file. This is because when you add the -# cuda file to Visual Studio it knows that this file produces an object file -# and will link in the resulting object file automatically. -# -# -# -# :: -# -# This script will also generate a separate cmake script that is used at -# build time to invoke nvcc. This is for several reasons. -# -# -# -# :: -# -# 1. nvcc can return negative numbers as return values which confuses -# Visual Studio into thinking that the command succeeded. The script now -# checks the error codes and produces errors when there was a problem. -# -# -# -# :: -# -# 2. nvcc has been known to not delete incomplete results when it -# encounters problems. This confuses build systems into thinking the -# target was generated when in fact an unusable file exists. The script -# now deletes the output files if there was an error. -# -# -# -# :: -# -# 3. By putting all the options that affect the build into a file and then -# make the build rule dependent on the file, the output files will be -# regenerated when the options change. -# -# -# -# :: -# -# This script also looks at optional arguments STATIC, SHARED, or MODULE to -# determine when to target the object compilation for a shared library. -# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in -# CUDA_ADD_LIBRARY. On some systems special flags are added for building -# objects intended for shared libraries. A preprocessor macro, -# _EXPORTS is defined when a shared library compilation is -# detected. -# -# -# -# :: -# -# Flags passed into add_definitions with -D or /D are passed along to nvcc. -# -# -# -# The script defines the following variables: -# -# :: -# -# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. -# CUDA_VERSION_MINOR -- The minor version. -# CUDA_VERSION -# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR -# -# -# -# :: -# -# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). -# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the -# SDK. This script will not directly support finding -# specific libraries or headers, as that isn't -# supported by NVIDIA. If you want to change -# libraries when the path changes see the -# FindCUDA.cmake script for an example of how to clear -# these variables. There are also examples of how to -# use the CUDA_SDK_ROOT_DIR to locate headers or -# libraries, if you so choose (at your own risk). -# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically -# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. -# CUDA_LIBRARIES -- Cuda RT library. -# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT -# implementation (alternative to: -# CUDA_ADD_CUFFT_TO_TARGET macro) -# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS -# implementation (alterative to: -# CUDA_ADD_CUBLAS_TO_TARGET macro). -# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library. -# Only available for CUDA version 4.0+. -# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. -# Only available for CUDA version 3.2+. -# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. -# Only available for CUDA version 3.2+. -# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives library. -# Only available for CUDA version 4.0+. -# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives library (core). -# Only available for CUDA version 5.5+. -# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives library (image processing). -# Only available for CUDA version 5.5+. -# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives library (signal processing). -# Only available for CUDA version 5.5+. -# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# -# -# -# -# -# :: -# -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# -# -# :: -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# -# -# :: -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# -# -# :: -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# -############################################################################### - -# FindCUDA.cmake - -# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3) -cmake_policy(PUSH) -cmake_minimum_required(VERSION 2.6.3) -cmake_policy(POP) - -# This macro helps us find the location of helper files we will need the full path to -macro(CUDA_FIND_HELPER_FILE _name _extension) - set(_full_name "${_name}.${_extension}") - # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being - # processed. Using this variable, we can pull out the current path, and - # provide a way to get access to the other files we need local to here. - get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) - set(CUDA_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindCUDA/${_full_name}") - if(NOT EXISTS "${CUDA_${_name}}") - set(error_message "${_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindCUDA") - if(CUDA_FIND_REQUIRED) - message(FATAL_ERROR "${error_message}") - else() - if(NOT CUDA_FIND_QUIETLY) - message(STATUS "${error_message}") - endif() - endif() - endif() - # Set this variable as internal, so the user isn't bugged with it. - set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) -endmacro() - -##################################################################### -## CUDA_INCLUDE_NVCC_DEPENDENCIES -## - -# So we want to try and include the dependency file if it exists. If -# it doesn't exist then we need to create an empty one, so we can -# include it. - -# If it does exist, then we need to check to see if all the files it -# depends on exist. If they don't then we should clear the dependency -# file and regenerate it later. This covers the case where a header -# file has disappeared or moved. - -macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file) - set(CUDA_NVCC_DEPEND) - set(CUDA_NVCC_DEPEND_REGENERATE FALSE) - - - # Include the dependency file. Create it first if it doesn't exist . The - # INCLUDE puts a dependency that will force CMake to rerun and bring in the - # new info when it changes. DO NOT REMOVE THIS (as I did and spent a few - # hours figuring out why it didn't work. - if(NOT EXISTS ${dependency_file}) - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") - endif() - # Always include this file to force CMake to run again next - # invocation and rebuild the dependencies. - #message("including dependency_file = ${dependency_file}") - include(${dependency_file}) - - # Now we need to verify the existence of all the included files - # here. If they aren't there we need to just blank this variable and - # make the file regenerate again. -# if(DEFINED CUDA_NVCC_DEPEND) -# message("CUDA_NVCC_DEPEND set") -# else() -# message("CUDA_NVCC_DEPEND NOT set") -# endif() - if(CUDA_NVCC_DEPEND) - #message("CUDA_NVCC_DEPEND found") - foreach(f ${CUDA_NVCC_DEPEND}) - # message("searching for ${f}") - if(NOT EXISTS ${f}) - #message("file ${f} not found") - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) - endif() - endforeach() - else() - #message("CUDA_NVCC_DEPEND false") - # No dependencies, so regenerate the file. - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) - endif() - - #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}") - # No incoming dependencies, so we need to generate them. Make the - # output depend on the dependency file itself, which should cause the - # rule to re-run. - if(CUDA_NVCC_DEPEND_REGENERATE) - set(CUDA_NVCC_DEPEND ${dependency_file}) - #message("Generating an empty dependency_file: ${dependency_file}") - file(WRITE ${dependency_file} "#FindCUDA.cmake generated file. Do not edit.\n") - endif() - -endmacro() - -############################################################################### -############################################################################### -# Setup variables' defaults -############################################################################### -############################################################################### - -# Allow the user to specify if the device code is supposed to be 32 or 64 bit. -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON) -else() - set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF) -endif() -option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT}) - -# Attach the build rule to the source file in VS. This option -option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file. Enable only when the CUDA source file is added to at most one target." ON) - -# Prints out extra information about the cuda file during compilation -option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF) - -# Set whether we are using emulation or device mode. -option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF) - -# Where to put the generated output. -set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files. If blank it will default to the CMAKE_CURRENT_BINARY_DIR") - -# Parse HOST_COMPILATION mode. -option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON) - -# Extra user settable flags -set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.") - -if(CMAKE_GENERATOR MATCHES "Visual Studio") - set(CUDA_HOST_COMPILER "$(VCInstallDir)bin" CACHE FILEPATH "Host side compiler used by NVCC") -else() - set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}" CACHE FILEPATH "Host side compiler used by NVCC") -endif() - -# Propagate the host flags to the host compiler via -Xcompiler -option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON) - -# Enable CUDA_SEPARABLE_COMPILATION -option(CUDA_SEPARABLE_COMPILATION "Compile CUDA objects with separable compilation enabled. Requires CUDA 5.0+" OFF) - -# Specifies whether the commands used when compiling the .cu file will be printed out. -option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) - -mark_as_advanced( - CUDA_64_BIT_DEVICE_CODE - CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE - CUDA_GENERATED_OUTPUT_DIR - CUDA_HOST_COMPILATION_CPP - CUDA_NVCC_FLAGS - CUDA_PROPAGATE_HOST_FLAGS - ) - -# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we -# need to add another entry for the CMAKE_BUILD_TYPE. We also need to add the -# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo) -# for completeness. We need run this loop in order to accomodate the addition -# of extra configuration types. Duplicate entries will be removed by -# REMOVE_DUPLICATES. -set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) -list(REMOVE_DUPLICATES CUDA_configuration_types) -foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.") - mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper}) -endforeach() - -############################################################################### -############################################################################### -# Locate CUDA, Set Build Type, etc. -############################################################################### -############################################################################### - -macro(cuda_unset_include_and_libraries) - unset(CUDA_TOOLKIT_INCLUDE CACHE) - unset(CUDA_CUDART_LIBRARY CACHE) - unset(CUDA_CUDA_LIBRARY CACHE) - # Make sure you run this before you unset CUDA_VERSION. - if(CUDA_VERSION VERSION_EQUAL "3.0") - # This only existed in the 3.0 version of the CUDA toolkit - unset(CUDA_CUDARTEMU_LIBRARY CACHE) - endif() - unset(CUDA_cupti_LIBRARY CACHE) - unset(CUDA_cublas_LIBRARY CACHE) - unset(CUDA_cublasemu_LIBRARY CACHE) - unset(CUDA_cufft_LIBRARY CACHE) - unset(CUDA_cufftemu_LIBRARY CACHE) - unset(CUDA_curand_LIBRARY CACHE) - unset(CUDA_cusparse_LIBRARY CACHE) - unset(CUDA_npp_LIBRARY CACHE) - unset(CUDA_nppc_LIBRARY CACHE) - unset(CUDA_nppi_LIBRARY CACHE) - unset(CUDA_npps_LIBRARY CACHE) - unset(CUDA_nvcuvenc_LIBRARY CACHE) - unset(CUDA_nvcuvid_LIBRARY CACHE) -endmacro() - -# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, -# if they have then clear the cache variables, so that will be detected again. -if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") - unset(CUDA_TOOLKIT_TARGET_DIR CACHE) - unset(CUDA_NVCC_EXECUTABLE CACHE) - unset(CUDA_VERSION CACHE) - cuda_unset_include_and_libraries() -endif() - -if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}") - cuda_unset_include_and_libraries() -endif() - -if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") - # No specific variables to catch. Use this kind of code before calling - # find_package(CUDA) to clean up any variables that may depend on this path. - - # unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE) - # unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE) -endif() - -# Search for the cuda distribution. -if(NOT CUDA_TOOLKIT_ROOT_DIR) - - # Search in the CUDA_BIN_PATH first. - find_path(CUDA_TOOLKIT_ROOT_DIR - NAMES nvcc nvcc.exe - PATHS - ENV CUDA_PATH - ENV CUDA_BIN_PATH - PATH_SUFFIXES bin bin64 - DOC "Toolkit location." - NO_DEFAULT_PATH - ) - # Now search default paths - find_path(CUDA_TOOLKIT_ROOT_DIR - NAMES nvcc nvcc.exe - PATHS /usr/local/bin - /usr/local/cuda/bin - DOC "Toolkit location." - ) - - if (CUDA_TOOLKIT_ROOT_DIR) - string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR}) - # We need to force this back into the cache. - set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE) - endif() - if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) - if(CUDA_FIND_REQUIRED) - message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR") - elseif(NOT CUDA_FIND_QUIETLY) - message("CUDA_TOOLKIT_ROOT_DIR not found or specified") - endif() - endif () -endif () - -# CUDA_NVCC_EXECUTABLE -find_program(CUDA_NVCC_EXECUTABLE - NAMES nvcc - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" - ENV CUDA_PATH - ENV CUDA_BIN_PATH - PATH_SUFFIXES bin bin64 - NO_DEFAULT_PATH - ) -# Search default search paths, after we search our own set of paths. -find_program(CUDA_NVCC_EXECUTABLE nvcc) -mark_as_advanced(CUDA_NVCC_EXECUTABLE) - -if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) - # Compute the version. - execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) -else() - # Need to set these based off of the cached value - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}") - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}") -endif() - -# Always set this convenience variable -set(CUDA_VERSION_STRING "${CUDA_VERSION}") - -# Support for arm cross compilation with CUDA 5.5 -set(__cuda_toolkit_target_dir_initial "${CUDA_TOOLKIT_ROOT_DIR}") -if(CUDA_VERSION VERSION_GREATER "5.0" AND CMAKE_CROSSCOMPILING AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") - if(ANDROID AND EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-androideabi") - set(__cuda_toolkit_target_dir_initial "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-androideabi") - elseif(EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf") - set(__cuda_toolkit_target_dir_initial "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf") - endif() -endif() -set(CUDA_TOOLKIT_TARGET_DIR "${__cuda_toolkit_target_dir_initial}" CACHE PATH "Toolkit target location.") -mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR) - -# Target CPU architecture -if(CUDA_VERSION VERSION_GREATER "5.0" AND CMAKE_CROSSCOMPILING AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") - set(_cuda_target_cpu_arch_initial "ARM") -else() - set(_cuda_target_cpu_arch_initial "") -endif() -set(CUDA_TARGET_CPU_ARCH ${_cuda_target_cpu_arch_initial} CACHE STRING "Specify the name of the class of CPU architecture for which the input files must be compiled.") -mark_as_advanced(CUDA_TARGET_CPU_ARCH) - -# CUDA_TOOLKIT_INCLUDE -find_path(CUDA_TOOLKIT_INCLUDE - device_functions.h # Header included in toolkit - PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}" - ENV CUDA_PATH - ENV CUDA_INC_PATH - PATH_SUFFIXES include - NO_DEFAULT_PATH - ) -# Search default search paths, after we search our own set of paths. -find_path(CUDA_TOOLKIT_INCLUDE device_functions.h) -mark_as_advanced(CUDA_TOOLKIT_INCLUDE) - -# Set the user list of include dir to nothing to initialize it. -set (CUDA_NVCC_INCLUDE_ARGS_USER "") -set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) - -macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext ) - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - # CUDA 3.2+ on Windows moved the library directories, so we need the new - # and old paths. - set(_cuda_64bit_lib_dir "${_path_ext}lib/x64" "${_path_ext}lib64" "${_path_ext}libx64" ) - endif() - # CUDA 3.2+ on Windows moved the library directories, so we need to new - # (lib/Win32) and the old path (lib). - find_library(${_var} - NAMES ${_names} - PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}" - ENV CUDA_PATH - ENV CUDA_LIB_PATH - PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32" - DOC ${_doc} - NO_DEFAULT_PATH - ) - # Search default search paths, after we search our own set of paths. - find_library(${_var} - NAMES ${_names} - PATHS "/usr/lib/nvidia-current" - DOC ${_doc} - ) -endmacro() - -macro(cuda_find_library_local_first _var _names _doc) - cuda_find_library_local_first_with_path_ext( "${_var}" "${_names}" "${_doc}" "" ) -endmacro() - -macro(find_library_local_first _var _names _doc ) - cuda_find_library_local_first( "${_var}" "${_names}" "${_doc}" "" ) -endmacro() - - -# CUDA_LIBRARIES -cuda_find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library") -if(CUDA_VERSION VERSION_EQUAL "3.0") - # The cudartemu library only existed for the 3.0 version of CUDA. - cuda_find_library_local_first(CUDA_CUDARTEMU_LIBRARY cudartemu "\"cudartemu\" library") - mark_as_advanced( - CUDA_CUDARTEMU_LIBRARY - ) -endif() - -# CUPTI library showed up in cuda toolkit 4.0 -if(NOT CUDA_VERSION VERSION_LESS "4.0") - cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/") - mark_as_advanced(CUDA_cupti_LIBRARY) -endif() - -# If we are using emulation mode and we found the cudartemu library then use -# that one instead of cudart. -if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) - set(CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY}) -else() - set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) -endif() -# if(APPLE) -# # We need to add the path to cudart to the linker using rpath, since the -# # library name for the cuda libraries is prepended with @rpath. -# if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) -# get_filename_component(_cuda_path_to_cudart "${CUDA_CUDARTEMU_LIBRARY}" PATH) -# else() -# get_filename_component(_cuda_path_to_cudart "${CUDA_CUDART_LIBRARY}" PATH) -# endif() -# if(_cuda_path_to_cudart) -# list(APPEND CUDA_LIBRARIES -Wl,-rpath "-Wl,${_cuda_path_to_cudart}") -# endif() -# endif() - -# 1.1 toolkit on linux doesn't appear to have a separate library on -# some platforms. -cuda_find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).") - -mark_as_advanced( - CUDA_CUDA_LIBRARY - CUDA_CUDART_LIBRARY - ) - -####################### -# Look for some of the toolkit helper libraries -macro(FIND_CUDA_HELPER_LIBS _name) - cuda_find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library") - mark_as_advanced(CUDA_${_name}_LIBRARY) -endmacro() - -####################### -# Disable emulation for v3.1 onward -if(CUDA_VERSION VERSION_GREATER "3.0") - if(CUDA_BUILD_EMULATION) - message(FATAL_ERROR "CUDA_BUILD_EMULATION is not supported in version 3.1 and onwards. You must disable it to proceed. You have version ${CUDA_VERSION}.") - endif() -endif() - -# Search for additional CUDA toolkit libraries. -if(CUDA_VERSION VERSION_LESS "3.1") - # Emulation libraries aren't available in version 3.1 onward. - find_cuda_helper_libs(cufftemu) - find_cuda_helper_libs(cublasemu) -endif() -find_cuda_helper_libs(cufft) -find_cuda_helper_libs(cublas) -if(NOT CUDA_VERSION VERSION_LESS "3.2") - # cusparse showed up in version 3.2 - find_cuda_helper_libs(cusparse) - find_cuda_helper_libs(curand) - if (WIN32) - find_cuda_helper_libs(nvcuvenc) - find_cuda_helper_libs(nvcuvid) - endif() -endif() -if(CUDA_VERSION VERSION_GREATER "5.0") - # In CUDA 5.5 NPP was splitted onto 3 separate libraries. - find_cuda_helper_libs(nppc) - find_cuda_helper_libs(nppi) - find_cuda_helper_libs(npps) - set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}") -elseif(NOT CUDA_VERSION VERSION_LESS "4.0") - find_cuda_helper_libs(npp) -endif() - -if (CUDA_BUILD_EMULATION) - set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY}) -else() - set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY}) -endif() - -######################## -# Look for the SDK stuff. As of CUDA 3.0 NVSDKCUDA_ROOT has been replaced with -# NVSDKCOMPUTE_ROOT with the old CUDA C contents moved into the C subdirectory -find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h - HINTS - "$ENV{NVSDKCOMPUTE_ROOT}/C" - ENV NVSDKCUDA_ROOT - "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]" - PATHS - "/Developer/GPU\ Computing/C" - ) - -# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the -# environment variables. -set(CUDA_SDK_SEARCH_PATH - "${CUDA_SDK_ROOT_DIR}" - "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2" - "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2" - "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK" - "$ENV{HOME}/NVIDIA_CUDA_SDK" - "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX" - "/Developer/CUDA" - ) - -# Example of how to find an include file from the CUDA_SDK_ROOT_DIR - -# find_path(CUDA_CUT_INCLUDE_DIR -# cutil.h -# PATHS ${CUDA_SDK_SEARCH_PATH} -# PATH_SUFFIXES "common/inc" -# DOC "Location of cutil.h" -# NO_DEFAULT_PATH -# ) -# # Now search system paths -# find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h") - -# mark_as_advanced(CUDA_CUT_INCLUDE_DIR) - - -# Example of how to find a library in the CUDA_SDK_ROOT_DIR - -# # cutil library is called cutil64 for 64 bit builds on windows. We don't want -# # to get these confused, so we are setting the name based on the word size of -# # the build. - -# if(CMAKE_SIZEOF_VOID_P EQUAL 8) -# set(cuda_cutil_name cutil64) -# else() -# set(cuda_cutil_name cutil32) -# endif() - -# find_library(CUDA_CUT_LIBRARY -# NAMES cutil ${cuda_cutil_name} -# PATHS ${CUDA_SDK_SEARCH_PATH} -# # The new version of the sdk shows up in common/lib, but the old one is in lib -# PATH_SUFFIXES "common/lib" "lib" -# DOC "Location of cutil library" -# NO_DEFAULT_PATH -# ) -# # Now search system paths -# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library") -# mark_as_advanced(CUDA_CUT_LIBRARY) -# set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY}) - - - -############################# -# Check for required components -set(CUDA_FOUND TRUE) - -set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE) -set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE) -set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL - "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(CUDA - REQUIRED_VARS - CUDA_TOOLKIT_ROOT_DIR - CUDA_NVCC_EXECUTABLE - CUDA_INCLUDE_DIRS - CUDA_CUDART_LIBRARY - VERSION_VAR - CUDA_VERSION - ) - - - -############################################################################### -############################################################################### -# Macros -############################################################################### -############################################################################### - -############################################################################### -# Add include directories to pass to the nvcc command. -macro(CUDA_INCLUDE_DIRECTORIES) - foreach(dir ${ARGN}) - list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER -I${dir}) - endforeach() -endmacro() - - -############################################################################## -cuda_find_helper_file(parse_cubin cmake) -cuda_find_helper_file(make2cmake cmake) -cuda_find_helper_file(run_nvcc cmake) - -############################################################################## -# Separate the OPTIONS out from the sources -# -macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options) - set( ${_sources} ) - set( ${_cmake_options} ) - set( ${_options} ) - set( _found_options FALSE ) - foreach(arg ${ARGN}) - if(arg STREQUAL "OPTIONS") - set( _found_options TRUE ) - elseif( - arg STREQUAL "WIN32" OR - arg STREQUAL "MACOSX_BUNDLE" OR - arg STREQUAL "EXCLUDE_FROM_ALL" OR - arg STREQUAL "STATIC" OR - arg STREQUAL "SHARED" OR - arg STREQUAL "MODULE" - ) - list(APPEND ${_cmake_options} ${arg}) - else() - if ( _found_options ) - list(APPEND ${_options} ${arg}) - else() - # Assume this is a file - list(APPEND ${_sources} ${arg}) - endif() - endif() - endforeach() -endmacro() - -############################################################################## -# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix -# -macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix) - set( _found_config ) - foreach(arg ${ARGN}) - # Determine if we are dealing with a perconfiguration flag - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - if (arg STREQUAL "${config_upper}") - set( _found_config _${arg}) - # Set arg to nothing to keep it from being processed further - set( arg ) - endif() - endforeach() - - if ( arg ) - list(APPEND ${_option_prefix}${_found_config} "${arg}") - endif() - endforeach() -endmacro() - -############################################################################## -# Helper to add the include directory for CUDA only once -function(CUDA_ADD_CUDA_INCLUDE_ONCE) - get_directory_property(_include_directories INCLUDE_DIRECTORIES) - set(_add TRUE) - if(_include_directories) - foreach(dir ${_include_directories}) - if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}") - set(_add FALSE) - endif() - endforeach() - endif() - if(_add) - include_directories(${CUDA_INCLUDE_DIRS}) - endif() -endfunction() - -function(CUDA_BUILD_SHARED_LIBRARY shared_flag) - set(cmake_args ${ARGN}) - # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then - # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS. - list(FIND cmake_args SHARED _cuda_found_SHARED) - list(FIND cmake_args MODULE _cuda_found_MODULE) - list(FIND cmake_args STATIC _cuda_found_STATIC) - if( _cuda_found_SHARED GREATER -1 OR - _cuda_found_MODULE GREATER -1 OR - _cuda_found_STATIC GREATER -1) - set(_cuda_build_shared_libs) - else() - if (BUILD_SHARED_LIBS) - set(_cuda_build_shared_libs SHARED) - else() - set(_cuda_build_shared_libs STATIC) - endif() - endif() - set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE) -endfunction() - -############################################################################## -# Helper to avoid clashes of files with the same basename but different paths. -# This doesn't attempt to do exactly what CMake internals do, which is to only -# add this path when there is a conflict, since by the time a second collision -# in names is detected it's already too late to fix the first one. For -# consistency sake the relative path will be added to all files. -function(CUDA_COMPUTE_BUILD_PATH path build_path) - #message("CUDA_COMPUTE_BUILD_PATH([${path}] ${build_path})") - # Only deal with CMake style paths from here on out - file(TO_CMAKE_PATH "${path}" bpath) - if (IS_ABSOLUTE "${bpath}") - # Absolute paths are generally unnessary, especially if something like - # file(GLOB_RECURSE) is used to pick up the files. - - string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos) - if (_binary_dir_pos EQUAL 0) - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}") - else() - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") - endif() - endif() - - # This recipie is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the - # CMake source. - - # Remove leading / - string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") - # Avoid absolute paths by removing ':' - string(REPLACE ":" "_" bpath "${bpath}") - # Avoid relative paths that go up the tree - string(REPLACE "../" "__/" bpath "${bpath}") - # Avoid spaces - string(REPLACE " " "_" bpath "${bpath}") - - # Strip off the filename. I wait until here to do it, since removin the - # basename can make a path that looked like path/../basename turn into - # path/.. (notice the trailing slash). - get_filename_component(bpath "${bpath}" PATH) - - set(${build_path} "${bpath}" PARENT_SCOPE) - #message("${build_path} = ${bpath}") -endfunction() - -############################################################################## -# This helper macro populates the following variables and setups up custom -# commands and targets to invoke the nvcc compiler to generate C or PTX source -# dependent upon the format parameter. The compiler is invoked once with -M -# to generate a dependency file and a second time with -cuda or -ptx to generate -# a .cpp or .ptx file. -# INPUT: -# cuda_target - Target name -# format - PTX or OBJ -# FILE1 .. FILEN - The remaining arguments are the sources to be wrapped. -# OPTIONS - Extra options to NVCC -# OUTPUT: -# generated_files - List of generated files -############################################################################## -############################################################################## - -macro(CUDA_WRAP_SRCS cuda_target format generated_files) - - # If CMake doesn't support separable compilation, complain - if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1") - message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1") - endif() - - # Set up all the command line flags here, so that they can be overridden on a per target basis. - - set(nvcc_flags "") - - # Emulation if the card isn't present. - if (CUDA_BUILD_EMULATION) - # Emulation. - set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g) - else() - # Device mode. No flags necessary. - endif() - - if(CUDA_HOST_COMPILATION_CPP) - set(CUDA_C_OR_CXX CXX) - else() - if(CUDA_VERSION VERSION_LESS "3.0") - set(nvcc_flags ${nvcc_flags} --host-compilation C) - else() - message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0. Removing --host-compilation C flag" ) - endif() - set(CUDA_C_OR_CXX C) - endif() - - set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION}) - - if(CUDA_64_BIT_DEVICE_CODE) - set(nvcc_flags ${nvcc_flags} -m64) - else() - set(nvcc_flags ${nvcc_flags} -m32) - endif() - - if(CUDA_TARGET_CPU_ARCH) - set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}") - endif() - - # This needs to be passed in at this stage, because VS needs to fill out the - # value of VCInstallDir from within VS. Note that CCBIN is only used if - # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches - # $(VCInstallDir)/bin. - if(CMAKE_GENERATOR MATCHES "Visual Studio") - set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" ) - else() - set(ccbin_flags) - endif() - - # Figure out which configure we will use and pass that in as an argument to - # the script. We need to defer the decision until compilation time, because - # for VS projects we won't know if we are making a debug or release build - # until build time. - if(CMAKE_GENERATOR MATCHES "Visual Studio") - set( CUDA_build_configuration "$(ConfigurationName)" ) - else() - set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}") - endif() - - # Initialize our list of includes with the user ones followed by the CUDA system ones. - set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}") - # Get the include directories for this directory and use them for our nvcc command. - # Remove duplicate entries which may be present since include_directories - # in CMake >= 2.8.8 does not remove them. - get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES) - list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRECTORIES) - if(CUDA_NVCC_INCLUDE_DIRECTORIES) - foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES}) - list(APPEND CUDA_NVCC_INCLUDE_ARGS -I${dir}) - endforeach() - endif() - - # Reset these variables - set(CUDA_WRAP_OPTION_NVCC_FLAGS) - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}) - endforeach() - - CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${ARGN}) - CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options}) - - # Figure out if we are building a shared library. BUILD_SHARED_LIBS is - # respected in CUDA_ADD_LIBRARY. - set(_cuda_build_shared_libs FALSE) - # SHARED, MODULE - list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED) - list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE) - if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1) - set(_cuda_build_shared_libs TRUE) - endif() - # STATIC - list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC) - if(_cuda_found_STATIC GREATER -1) - set(_cuda_build_shared_libs FALSE) - endif() - - # CUDA_HOST_FLAGS - if(_cuda_build_shared_libs) - # If we are setting up code for a shared library, then we need to add extra flags for - # compiling objects for shared libraries. - set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS}) - else() - set(CUDA_HOST_SHARED_FLAGS) - endif() - # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags. We - # always need to set the SHARED_FLAGS, though. - if(CUDA_PROPAGATE_HOST_FLAGS) - set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})") - else() - set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})") - endif() - - set(_cuda_nvcc_flags_config "# Build specific configuration flags") - # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - # CMAKE_FLAGS are strings and not lists. By not putting quotes around CMAKE_FLAGS - # we convert the strings to lists (like we want). - - if(CUDA_PROPAGATE_HOST_FLAGS) - # nvcc chokes on -g3 in versions previous to 3.0, so replace it with -g - set(_cuda_fix_g3 FALSE) - - if(CMAKE_COMPILER_IS_GNUCC) - if (CUDA_VERSION VERSION_LESS "3.0" OR - CUDA_VERSION VERSION_EQUAL "4.1" OR - CUDA_VERSION VERSION_EQUAL "4.2" - ) - set(_cuda_fix_g3 TRUE) - endif() - endif() - if(_cuda_fix_g3) - string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - else() - set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") - endif() - - set(_cuda_host_flags "${_cuda_host_flags}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})") - endif() - - # Note that if we ever want CUDA_NVCC_FLAGS_ to be string (instead of a list - # like it is currently), we can remove the quotes around the - # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_ variable. - set(_cuda_nvcc_flags_config "${_cuda_nvcc_flags_config}\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})") - endforeach() - - # Get the list of definitions from the directory property - get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS) - if(CUDA_NVCC_DEFINITIONS) - foreach(_definition ${CUDA_NVCC_DEFINITIONS}) - list(APPEND nvcc_flags "-D${_definition}") - endforeach() - endif() - - if(_cuda_build_shared_libs) - list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS") - endif() - - # Reset the output variable - set(_cuda_wrap_generated_files "") - - # Iterate over the macro arguments and create custom - # commands for all the .cu files. - foreach(file ${ARGN}) - # Ignore any file marked as a HEADER_FILE_ONLY - get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) - if(${file} MATCHES ".*\\.cu$" AND NOT _is_header) - - # Allow per source file overrides of the format. - get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT) - if(NOT _cuda_source_format) - set(_cuda_source_format ${format}) - endif() - - if( ${_cuda_source_format} MATCHES "PTX" ) - set( compile_to_ptx ON ) - elseif( ${_cuda_source_format} MATCHES "OBJ") - set( compile_to_ptx OFF ) - else() - message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS for file '${file}': '${_cuda_source_format}'. Use OBJ or PTX.") - endif() - - - if(compile_to_ptx) - # Don't use any of the host compilation flags for PTX targets. - set(CUDA_HOST_FLAGS) - set(CUDA_NVCC_FLAGS_CONFIG) - else() - set(CUDA_HOST_FLAGS ${_cuda_host_flags}) - set(CUDA_NVCC_FLAGS_CONFIG ${_cuda_nvcc_flags_config}) - endif() - - # Determine output directory - cuda_compute_build_path("${file}" cuda_build_path) - set(cuda_compile_intermediate_directory "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${cuda_build_path}") - if(CUDA_GENERATED_OUTPUT_DIR) - set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}") - else() - if ( compile_to_ptx ) - set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}") - else() - set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}") - endif() - endif() - - # Add a custom target to generate a c or ptx file. ###################### - - get_filename_component( basename ${file} NAME ) - if( compile_to_ptx ) - set(generated_file_path "${cuda_compile_output_dir}") - set(generated_file_basename "${cuda_target}_generated_${basename}.ptx") - set(format_flag "-ptx") - file(MAKE_DIRECTORY "${cuda_compile_output_dir}") - else() - set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}") - set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}") - if(CUDA_SEPARABLE_COMPILATION) - set(format_flag "-dc") - else() - set(format_flag "-c") - endif() - endif() - - # Set all of our file names. Make sure that whatever filenames that have - # generated_file_path in them get passed in through as a command line - # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time - # instead of configure time. - set(generated_file "${generated_file_path}/${generated_file_basename}") - set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend") - set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend") - set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt") - set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake") - - # Setup properties for obj files: - if( NOT compile_to_ptx ) - set_source_files_properties("${generated_file}" - PROPERTIES - EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked. - ) - endif() - - # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path. - get_filename_component(file_path "${file}" PATH) - if(IS_ABSOLUTE "${file_path}") - set(source_file "${file}") - else() - set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") - endif() - - if( NOT compile_to_ptx AND CUDA_SEPARABLE_COMPILATION) - list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}") - endif() - - # Bring in the dependencies. Creates a variable CUDA_NVCC_DEPEND ####### - cuda_include_nvcc_dependencies(${cmake_dependency_file}) - - # Convience string for output ########################################### - if(CUDA_BUILD_EMULATION) - set(cuda_build_type "Emulation") - else() - set(cuda_build_type "Device") - endif() - - # Build the NVCC made dependency file ################################### - set(build_cubin OFF) - if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) - if ( NOT compile_to_ptx ) - set ( build_cubin ON ) - endif() - endif() - - # Configure the build script - configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY) - - # So if a user specifies the same cuda file as input more than once, you - # can have bad things happen with dependencies. Here we check an option - # to see if this is the behavior they want. - if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE) - set(main_dep MAIN_DEPENDENCY ${source_file}) - else() - set(main_dep DEPENDS ${source_file}) - endif() - - if(CUDA_VERBOSE_BUILD) - set(verbose_output ON) - elseif(CMAKE_GENERATOR MATCHES "Makefiles") - set(verbose_output "$(VERBOSE)") - else() - set(verbose_output OFF) - endif() - - # Create up the comment string - file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - if(compile_to_ptx) - set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}") - else() - set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}") - endif() - - # Build the generated file and dependency file ########################## - add_custom_command( - OUTPUT ${generated_file} - # These output files depend on the source_file and the contents of cmake_dependency_file - ${main_dep} - DEPENDS ${CUDA_NVCC_DEPEND} - DEPENDS ${custom_target_script} - # Make sure the output directory exists before trying to write to it. - COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" - COMMAND ${CMAKE_COMMAND} ARGS - -D verbose:BOOL=${verbose_output} - ${ccbin_flags} - -D build_configuration:STRING=${CUDA_build_configuration} - -D "generated_file:STRING=${generated_file}" - -D "generated_cubin_file:STRING=${generated_cubin_file}" - -P "${custom_target_script}" - WORKING_DIRECTORY "${cuda_compile_intermediate_directory}" - COMMENT "${cuda_build_comment_string}" - ) - - # Make sure the build system knows the file is generated. - set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) - - list(APPEND _cuda_wrap_generated_files ${generated_file}) - - # Add the other files that we want cmake to clean on a cleanup ########## - list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}") - list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES) - set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") - - endif() - endforeach() - - # Set the return parameter - set(${generated_files} ${_cuda_wrap_generated_files}) -endmacro() - -function(_cuda_get_important_host_flags important_flags flag_string) - if(CMAKE_GENERATOR MATCHES "Visual Studio") - string(REGEX MATCHALL "/M[DT][d]?" flags ${flag_string}) - list(APPEND ${important_flags} ${flags}) - else() - string(REGEX MATCHALL "-fPIC" flags ${flag_string}) - list(APPEND ${important_flags} ${flags}) - endif() - set(${important_flags} ${${important_flags}} PARENT_SCOPE) -endfunction() - -############################################################################### -############################################################################### -# Separable Compilation Link -############################################################################### -############################################################################### - -# Compute the filename to be used by CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS -function(CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME output_file_var cuda_target object_files) - if (object_files) - set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION}) - set(output_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${CMAKE_CFG_INTDIR}/${cuda_target}_intermediate_link${generated_extension}") - else() - set(output_file) - endif() - - set(${output_file_var} "${output_file}" PARENT_SCOPE) -endfunction() - -# Setup the build rule for the separable compilation intermediate link file. -function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options object_files) - if (object_files) - - set_source_files_properties("${output_file}" - PROPERTIES - EXTERNAL_OBJECT TRUE # This is an object file not to be compiled, but only - # be linked. - GENERATED TRUE # This file is generated during the build - ) - - # For now we are ignoring all the configuration specific flags. - set(nvcc_flags) - CUDA_PARSE_NVCC_OPTIONS(nvcc_flags ${options}) - if(CUDA_64_BIT_DEVICE_CODE) - list(APPEND nvcc_flags -m64) - else() - list(APPEND nvcc_flags -m32) - endif() - # If -ccbin, --compiler-bindir has been specified, don't do anything. Otherwise add it here. - list( FIND nvcc_flags "-ccbin" ccbin_found0 ) - list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 ) - if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 ) - list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"") - endif() - set(flags) - foreach(config ${CUDA_configuration_types}) - string(TOUPPER ${config} config_upper) - set(important_host_flags) - _cuda_get_important_host_flags(important_host_flags ${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}) - foreach(f ${important_host_flags}) - list(APPEND flags $<$:-Xcompiler> $<$:${f}>) - endforeach() - endforeach() - file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}") - - # Some generators don't handle the multiple levels of custom command - # dependencies correctly (obj1 depends on file1, obj2 depends on obj1), so - # we work around that issue by compiling the intermediate link object as a - # pre-link custom command in that situation. - set(do_obj_build_rule TRUE) - if (MSVC_VERSION GREATER 1599) - # VS 2010 and 2012 have this problem. If future versions fix this issue, - # it should still work, it just won't be as nice as the other method. - set(do_obj_build_rule FALSE) - endif() - - if (do_obj_build_rule) - add_custom_command( - OUTPUT ${output_file} - DEPENDS ${object_files} - COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file} - ${flags} - COMMENT "Building NVCC intermediate link file ${output_file_relative_path}" - ) - else() - add_custom_command( - TARGET ${cuda_target} - PRE_LINK - COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}" - COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}" - ) - endif() - endif() -endfunction() - -############################################################################### -############################################################################### -# ADD LIBRARY -############################################################################### -############################################################################### -macro(CUDA_ADD_LIBRARY cuda_target) - - CUDA_ADD_CUDA_INCLUDE_ONCE() - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} - ${_cmake_options} ${_cuda_shared_flag} - OPTIONS ${_options} ) - - # Compute the file name of the intermedate link file used for separable - # compilation. - CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - # Add the library. - add_library(${cuda_target} ${_cmake_options} - ${_generated_files} - ${_sources} - ${link_file} - ) - - # Add a link phase for the separable compilation if it has been enabled. If - # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS - # variable will have been defined. - CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - target_link_libraries(${cuda_target} - ${CUDA_LIBRARIES} - ) - - # We need to set the linker language based on what the expected generated file - # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. - set_target_properties(${cuda_target} - PROPERTIES - LINKER_LANGUAGE ${CUDA_C_OR_CXX} - ) - -endmacro() - - -############################################################################### -############################################################################### -# ADD EXECUTABLE -############################################################################### -############################################################################### -macro(CUDA_ADD_EXECUTABLE cuda_target) - - CUDA_ADD_CUDA_INCLUDE_ONCE() - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} ) - - # Compute the file name of the intermedate link file used for separable - # compilation. - CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - # Add the library. - add_executable(${cuda_target} ${_cmake_options} - ${_generated_files} - ${_sources} - ${link_file} - ) - - # Add a link phase for the separable compilation if it has been enabled. If - # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS - # variable will have been defined. - CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - - target_link_libraries(${cuda_target} - ${CUDA_LIBRARIES} - ) - - # We need to set the linker language based on what the expected generated file - # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. - set_target_properties(${cuda_target} - PROPERTIES - LINKER_LANGUAGE ${CUDA_C_OR_CXX} - ) - -endmacro() - - -############################################################################### -############################################################################### -# CUDA COMPILE -############################################################################### -############################################################################### -macro(CUDA_COMPILE generated_files) - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} ${_cmake_options} - OPTIONS ${_options} ) - - set( ${generated_files} ${_generated_files}) - -endmacro() - - -############################################################################### -############################################################################### -# CUDA COMPILE PTX -############################################################################### -############################################################################### -macro(CUDA_COMPILE_PTX generated_files) - - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} ${_cmake_options} - OPTIONS ${_options} ) - - set( ${generated_files} ${_generated_files}) - -endmacro() - -############################################################################### -############################################################################### -# CUDA ADD CUFFT TO TARGET -############################################################################### -############################################################################### -macro(CUDA_ADD_CUFFT_TO_TARGET target) - if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY}) - else() - target_link_libraries(${target} ${CUDA_cufft_LIBRARY}) - endif() -endmacro() - -############################################################################### -############################################################################### -# CUDA ADD CUBLAS TO TARGET -############################################################################### -############################################################################### -macro(CUDA_ADD_CUBLAS_TO_TARGET target) - if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY}) - else() - target_link_libraries(${target} ${CUDA_cublas_LIBRARY}) - endif() -endmacro() - -############################################################################### -############################################################################### -# CUDA BUILD CLEAN TARGET -############################################################################### -############################################################################### -macro(CUDA_BUILD_CLEAN_TARGET) - # Call this after you add all your CUDA targets, and you will get a convience - # target. You should also make clean after running this target to get the - # build system to generate all the code again. - - set(cuda_clean_target_name clean_cuda_depends) - if (CMAKE_GENERATOR MATCHES "Visual Studio") - string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name) - endif() - add_custom_target(${cuda_clean_target_name} - COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES}) - - # Clear out the variable, so the next time we configure it will be empty. - # This is useful so that the files won't persist in the list after targets - # have been removed. - set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.") -endmacro() diff --git a/src/cmake/FindCUDA/make2cmake.cmake b/src/cmake/FindCUDA/make2cmake.cmake deleted file mode 100755 index 1b53d177..00000000 --- a/src/cmake/FindCUDA/make2cmake.cmake +++ /dev/null @@ -1,93 +0,0 @@ -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# - -####################################################################### -# This converts a file written in makefile syntax into one that can be included -# by CMake. - -file(READ ${input_file} depend_text) - -if (${depend_text} MATCHES ".+") - - # message("FOUND DEPENDS") - - # Remember, four backslashes is escaped to one backslash in the string. - string(REGEX REPLACE "\\\\ " " " depend_text ${depend_text}) - - # This works for the nvcc -M generated dependency files. - string(REGEX REPLACE "^.* : " "" depend_text ${depend_text}) - string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) - - set(dependency_list "") - - foreach(file ${depend_text}) - - string(REGEX REPLACE "^ +" "" file ${file}) - - # OK, now if we had a UNC path, nvcc has a tendency to only output the first '/' - # instead of '//'. Here we will test to see if the file exists, if it doesn't then - # try to prepend another '/' to the path and test again. If it still fails remove the - # path. - - if(NOT EXISTS "${file}") - if (EXISTS "/${file}") - set(file "/${file}") - else() - message(WARNING " Removing non-existent dependency file: ${file}") - set(file "") - endif() - endif() - - if(NOT IS_DIRECTORY "${file}") - # If softlinks start to matter, we should change this to REALPATH. For now we need - # to flatten paths, because nvcc can generate stuff like /bin/../include instead of - # just /include. - get_filename_component(file_absolute "${file}" ABSOLUTE) - list(APPEND dependency_list "${file_absolute}") - endif() - - endforeach() - -else() - # message("FOUND NO DEPENDS") -endif() - -# Remove the duplicate entries and sort them. -list(REMOVE_DUPLICATES dependency_list) -list(SORT dependency_list) - -foreach(file ${dependency_list}) - set(cuda_nvcc_depend "${cuda_nvcc_depend} \"${file}\"\n") -endforeach() - -file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n") diff --git a/src/cmake/FindCUDA/parse_cubin.cmake b/src/cmake/FindCUDA/parse_cubin.cmake deleted file mode 100755 index e1905cfc..00000000 --- a/src/cmake/FindCUDA/parse_cubin.cmake +++ /dev/null @@ -1,110 +0,0 @@ -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. -# - -####################################################################### -# Parses a .cubin file produced by nvcc and reports statistics about the file. - - -file(READ ${input_file} file_text) - -if (${file_text} MATCHES ".+") - - # Remember, four backslashes is escaped to one backslash in the string. - string(REGEX REPLACE ";" "\\\\;" file_text ${file_text}) - string(REGEX REPLACE "\ncode" ";code" file_text ${file_text}) - - list(LENGTH file_text len) - - foreach(line ${file_text}) - - # Only look at "code { }" blocks. - if(line MATCHES "^code") - - # Break into individual lines. - string(REGEX REPLACE "\n" ";" line ${line}) - - foreach(entry ${line}) - - # Extract kernel names. - if (${entry} MATCHES "[^g]name = ([^ ]+)") - string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry}) - - # Check to see if the kernel name starts with "_" - set(skip FALSE) - # if (${entry} MATCHES "^_") - # Skip the rest of this block. - # message("Skipping ${entry}") - # set(skip TRUE) - # else () - message("Kernel: ${entry}") - # endif () - - endif() - - # Skip the rest of the block if necessary - if(NOT skip) - - # Registers - if (${entry} MATCHES "reg([ ]+)=([ ]+)([^ ]+)") - string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry}) - message("Registers: ${entry}") - endif() - - # Local memory - if (${entry} MATCHES "lmem([ ]+)=([ ]+)([^ ]+)") - string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry}) - message("Local: ${entry}") - endif() - - # Shared memory - if (${entry} MATCHES "smem([ ]+)=([ ]+)([^ ]+)") - string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry}) - message("Shared: ${entry}") - endif() - - if (${entry} MATCHES "^}") - message("") - endif() - - endif() - - - endforeach() - - endif() - - endforeach() - -else() - # message("FOUND NO DEPENDS") -endif() diff --git a/src/cmake/FindCUDA/run_nvcc.cmake b/src/cmake/FindCUDA/run_nvcc.cmake deleted file mode 100755 index f0aac848..00000000 --- a/src/cmake/FindCUDA/run_nvcc.cmake +++ /dev/null @@ -1,288 +0,0 @@ -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. - -# The MIT License -# -# License for the specific language governing rights and limitations under -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -########################################################################## -# This file runs the nvcc commands to produce the desired output file along with -# the dependency file needed by CMake to compute dependencies. In addition the -# file checks the output of each command and if the command fails it deletes the -# output files. - -# Input variables -# -# verbose:BOOL=<> OFF: Be as quiet as possible (default) -# ON : Describe each step -# -# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or -# RelWithDebInfo, but it should match one of the -# entries in CUDA_HOST_FLAGS. This is the build -# configuration used when compiling the code. If -# blank or unspecified Debug is assumed as this is -# what CMake does. -# -# generated_file:STRING=<> File to generate. This argument must be passed in. -# -# generated_cubin_file:STRING=<> File to generate. This argument must be passed -# in if build_cubin is true. - -if(NOT generated_file) - message(FATAL_ERROR "You must specify generated_file on the command line") -endif() - -# Set these up as variables to make reading the generated file easier -set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path -set(source_file "@source_file@") # path -set(NVCC_generated_dependency_file "@NVCC_generated_dependency_file@") # path -set(cmake_dependency_file "@cmake_dependency_file@") # path -set(CUDA_make2cmake "@CUDA_make2cmake@") # path -set(CUDA_parse_cubin "@CUDA_parse_cubin@") # path -set(build_cubin @build_cubin@) # bool -set(CUDA_HOST_COMPILER "@CUDA_HOST_COMPILER@") # bool -# We won't actually use these variables for now, but we need to set this, in -# order to force this file to be run again if it changes. -set(generated_file_path "@generated_file_path@") # path -set(generated_file_internal "@generated_file@") # path -set(generated_cubin_file_internal "@generated_cubin_file@") # path - -set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@") # path -set(CUDA_NVCC_FLAGS @CUDA_NVCC_FLAGS@ ;; @CUDA_WRAP_OPTION_NVCC_FLAGS@) # list -@CUDA_NVCC_FLAGS_CONFIG@ -set(nvcc_flags @nvcc_flags@) # list -set(CUDA_NVCC_INCLUDE_ARGS "@CUDA_NVCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly). -set(format_flag "@format_flag@") # string - -if(build_cubin AND NOT generated_cubin_file) - message(FATAL_ERROR "You must specify generated_cubin_file on the command line") -endif() - -# This is the list of host compilation flags. It C or CXX should already have -# been chosen by FindCUDA.cmake. -@CUDA_HOST_FLAGS@ - -# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler -set(nvcc_host_compiler_flags "") -# If we weren't given a build_configuration, use Debug. -if(NOT build_configuration) - set(build_configuration Debug) -endif() -string(TOUPPER "${build_configuration}" build_configuration) -#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") -foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) - # Extra quotes are added around each flag to help nvcc parse out flags with spaces. - set(nvcc_host_compiler_flags "${nvcc_host_compiler_flags},\"${flag}\"") -endforeach() -if (nvcc_host_compiler_flags) - set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) -endif() -#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"") -# Add the build specific configuration flags -list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) - -# Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority -list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) -list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) -if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 ) - if (CUDA_HOST_COMPILER STREQUAL "$(VCInstallDir)bin" AND DEFINED CCBIN) - set(CCBIN -ccbin "${CCBIN}") - else() - set(CCBIN -ccbin "${CUDA_HOST_COMPILER}") - endif() -endif() - -# cuda_execute_process - Executes a command with optional command echo and status message. -# -# status - Status message to print if verbose is true -# command - COMMAND argument from the usual execute_process argument structure -# ARGN - Remaining arguments are the command with arguments -# -# CUDA_result - return value from running the command -# -# Make this a macro instead of a function, so that things like RESULT_VARIABLE -# and other return variables are present after executing the process. -macro(cuda_execute_process status command) - set(_command ${command}) - if(NOT _command STREQUAL "COMMAND") - message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") - endif() - if(verbose) - execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) - # Now we need to build up our command string. We are accounting for quotes - # and spaces, anything else is left up to the user to fix if they want to - # copy and paste a runnable command line. - set(cuda_execute_process_string) - foreach(arg ${ARGN}) - # If there are quotes, excape them, so they come through. - string(REPLACE "\"" "\\\"" arg ${arg}) - # Args with spaces need quotes around them to get them to be parsed as a single argument. - if(arg MATCHES " ") - list(APPEND cuda_execute_process_string "\"${arg}\"") - else() - list(APPEND cuda_execute_process_string ${arg}) - endif() - endforeach() - # Echo the command - execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string}) - endif() - # Run the command - execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result ) -endmacro() - -# Delete the target file -cuda_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - -# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag -# for dependency generation and hope for the best. -set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") -set(CUDA_VERSION @CUDA_VERSION@) -if(CUDA_VERSION VERSION_LESS "3.0") - cmake_policy(PUSH) - # CMake policy 0007 NEW states that empty list elements are not - # ignored. I'm just setting it to avoid the warning that's printed. - cmake_policy(SET CMP0007 NEW) - # Note that this will remove all occurances of -G. - list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G") - cmake_policy(POP) -endif() - -# nvcc doesn't define __CUDACC__ for some reason when generating dependency files. This -# can cause incorrect dependencies when #including files based on this macro which is -# defined in the generating passes of nvcc invokation. We will go ahead and manually -# define this for now until a future version fixes this bug. -set(CUDACC_DEFINE -D__CUDACC__) - -# Generate the dependency file -cuda_execute_process( - "Generating dependency file: ${NVCC_generated_dependency_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - -M - ${CUDACC_DEFINE} - "${source_file}" - -o "${NVCC_generated_dependency_file}" - ${CCBIN} - ${nvcc_flags} - ${nvcc_host_compiler_flags} - ${depends_CUDA_NVCC_FLAGS} - -DNVCC - ${CUDA_NVCC_INCLUDE_ARGS} - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the cmake readable dependency file to a temp file. Don't put the -# quotes just around the filenames for the input_file and output_file variables. -# CMake will pass the quotes through and not be able to find the file. -cuda_execute_process( - "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" - -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" - -P "${CUDA_make2cmake}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Copy the file if it is different -cuda_execute_process( - "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Delete the temporary file -cuda_execute_process( - "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}" - ) - -if(CUDA_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the code -cuda_execute_process( - "Generating ${generated_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${format_flag} -o "${generated_file}" - ${CCBIN} - ${nvcc_flags} - ${nvcc_host_compiler_flags} - ${CUDA_NVCC_FLAGS} - -DNVCC - ${CUDA_NVCC_INCLUDE_ARGS} - ) - -if(CUDA_result) - # Since nvcc can sometimes leave half done files make sure that we delete the output file. - cuda_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - message(FATAL_ERROR "Error generating file ${generated_file}") -else() - if(verbose) - message("Generated ${generated_file} successfully.") - endif() -endif() - -# Cubin resource report commands. -if( build_cubin ) - # Run with -cubin to produce resource usage report. - cuda_execute_process( - "Generating ${generated_cubin_file}" - COMMAND "${CUDA_NVCC_EXECUTABLE}" - "${source_file}" - ${CUDA_NVCC_FLAGS} - ${nvcc_flags} - ${CCBIN} - ${nvcc_host_compiler_flags} - -DNVCC - -cubin - -o "${generated_cubin_file}" - ${CUDA_NVCC_INCLUDE_ARGS} - ) - - # Execute the parser script. - cuda_execute_process( - "Executing the parser script" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:STRING=${generated_cubin_file}" - -P "${CUDA_parse_cubin}" - ) - -endif() From 33fdacbe029b1a428c890eea1597091b1616b478 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 28 Jun 2018 14:05:36 +0200 Subject: [PATCH 039/285] [docker] add a first version --- Dockerfile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..df7c6415 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +ARG CUDA_TAG=9.2-devel +FROM nvidia/cuda:$CUDA_TAG + +ARG PYTHON + +# OS/Version (FILE): cat /etc/issue.net +# Cuda version (ENV): $CUDA_VERSION + +# System update +RUN apt-get clean && apt-get update && apt-get install -y \ + build-essential \ + cmake \ + git \ + wget \ + unzip \ + yasm \ + pkg-config \ + libtool \ + nasm \ + automake \ + libpng12-dev \ + libjpeg-turbo8-dev \ + libdevil-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libboost-program-options-dev \ + libboost-thread-dev + +ADD . /opt/popsift +RUN cd /opt/popsift && mkdir build && cd build && cmake .. && make install -j8 From fae6d24cbc5e1f350c5a4ff5688078985572ba92 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 28 Jun 2018 14:13:29 +0200 Subject: [PATCH 040/285] [docker] doc --- Dockerfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Dockerfile b/Dockerfile index df7c6415..5195f5bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,12 @@ +# use CUDA_TAG to select the image version to use +# see https://hub.docker.com/r/nvidia/cuda/ +# +# For example to create a ubuntu 16.04 with cuda 8.0 for development, use +# docker build --build-arg CUDA_TAG=8.0-devel --tag popsift . +# +# then execute wiht nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) +# docker run -it --runtime=nvidia popsift + ARG CUDA_TAG=9.2-devel FROM nvidia/cuda:$CUDA_TAG From ac670c190ce3c6e0352f7b5884c8ed788684cc91 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 28 Jun 2018 14:13:41 +0200 Subject: [PATCH 041/285] [docker] unused --- Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5195f5bc..23c0c403 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,6 @@ ARG CUDA_TAG=9.2-devel FROM nvidia/cuda:$CUDA_TAG -ARG PYTHON - # OS/Version (FILE): cat /etc/issue.net # Cuda version (ENV): $CUDA_VERSION From 1b892b24f19227bb38348ab714e1d3db6d5b4e6b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:13:51 +0200 Subject: [PATCH 042/285] [docker] fix typo --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 23c0c403..25ace02a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ # use CUDA_TAG to select the image version to use # see https://hub.docker.com/r/nvidia/cuda/ # -# For example to create a ubuntu 16.04 with cuda 8.0 for development, use +# For example, to create a ubuntu 16.04 with cuda 8.0 for development, use # docker build --build-arg CUDA_TAG=8.0-devel --tag popsift . # -# then execute wiht nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) +# then execute with nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) # docker run -it --runtime=nvidia popsift ARG CUDA_TAG=9.2-devel From 496940f42a9b65d620e1519377979313a0d2d5cb Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:14:26 +0200 Subject: [PATCH 043/285] [docker] added maintainer MAINTAINER is deprecated --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 25ace02a..964c7d5d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,7 @@ # then execute with nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) # docker run -it --runtime=nvidia popsift +LABEL maintainer="AliceVision Team alicevision@googlegroups.com" ARG CUDA_TAG=9.2-devel FROM nvidia/cuda:$CUDA_TAG From 2f9fbc204aac28d80588306670e4eef58e940d48 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:14:57 +0200 Subject: [PATCH 044/285] [docker] do not install recommends --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 964c7d5d..d86d4a2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ FROM nvidia/cuda:$CUDA_TAG # Cuda version (ENV): $CUDA_VERSION # System update -RUN apt-get clean && apt-get update && apt-get install -y \ +RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommends\ build-essential \ cmake \ git \ From 2cc22b359f570eba57b64a6136c4309aabaeb323 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:15:20 +0200 Subject: [PATCH 045/285] [docker] clean after aptget --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d86d4a2f..c551f969 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,8 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libboost-filesystem-dev \ libboost-system-dev \ libboost-program-options-dev \ - libboost-thread-dev + libboost-thread-dev \ + && rm -rf /var/lib/apt/lists/* ADD . /opt/popsift RUN cd /opt/popsift && mkdir build && cd build && cmake .. && make install -j8 From d636aedbb51279b946e08ebc19207a30e47540cf Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:15:56 +0200 Subject: [PATCH 046/285] [docker] use COPY instead of ADD --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c551f969..836850f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,5 +35,5 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libboost-thread-dev \ && rm -rf /var/lib/apt/lists/* -ADD . /opt/popsift RUN cd /opt/popsift && mkdir build && cd build && cmake .. && make install -j8 +COPY . /opt/popsift From bb37032c50cefe86b59e8f596e4d3ad051e62b1f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:16:27 +0200 Subject: [PATCH 047/285] [docker] use WORKDIR --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 836850f8..3a330731 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,5 +35,6 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libboost-thread-dev \ && rm -rf /var/lib/apt/lists/* -RUN cd /opt/popsift && mkdir build && cd build && cmake .. && make install -j8 COPY . /opt/popsift +WORKDIR /opt/popsift +RUN mkdir build && cd build && cmake .. && make install -j From 2d4f93f4ab76daf5ec2acfc7467d68761f8689ff Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:37:04 +0200 Subject: [PATCH 048/285] [docker] first lines cannot be comments apparently --- Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3a330731..c9de5066 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,7 @@ +ARG CUDA_TAG=9.2-devel +FROM nvidia/cuda:$CUDA_TAG +LABEL maintainer="AliceVision Team alicevision@googlegroups.com" + # use CUDA_TAG to select the image version to use # see https://hub.docker.com/r/nvidia/cuda/ # @@ -7,9 +11,6 @@ # then execute with nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) # docker run -it --runtime=nvidia popsift -LABEL maintainer="AliceVision Team alicevision@googlegroups.com" -ARG CUDA_TAG=9.2-devel -FROM nvidia/cuda:$CUDA_TAG # OS/Version (FILE): cat /etc/issue.net # Cuda version (ENV): $CUDA_VERSION From b2ee6f42d8e233763056a8881d0f11a42e997aeb Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 29 Jun 2018 15:53:10 +0200 Subject: [PATCH 049/285] [docker] explicit Release mode --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c9de5066..d1ad9762 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,4 +38,4 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend COPY . /opt/popsift WORKDIR /opt/popsift -RUN mkdir build && cd build && cmake .. && make install -j +RUN mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release && make install -j From f7386d4e7157dd6dfa38dee39043dd0f7abf0a1e Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 2 Jul 2018 17:00:53 +0200 Subject: [PATCH 050/285] [docker] tabs and workdir --- Dockerfile | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index d1ad9762..70fa3090 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,18 +17,18 @@ LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # System update RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommends\ - build-essential \ - cmake \ - git \ - wget \ - unzip \ - yasm \ - pkg-config \ - libtool \ - nasm \ - automake \ - libpng12-dev \ - libjpeg-turbo8-dev \ + build-essential \ + cmake \ + git \ + wget \ + unzip \ + yasm \ + pkg-config \ + libtool \ + nasm \ + automake \ + libpng12-dev \ + libjpeg-turbo8-dev \ libdevil-dev \ libboost-filesystem-dev \ libboost-system-dev \ @@ -37,5 +37,5 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend && rm -rf /var/lib/apt/lists/* COPY . /opt/popsift -WORKDIR /opt/popsift -RUN mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release && make install -j +WORKDIR /opt/popsift/build +RUN cmake .. -DCMAKE_BUILD_TYPE=Release && make install -j From e7f15c4ec1ecbb1d5ea4f820f3a71d05fe4af8e2 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Mon, 6 Aug 2018 16:48:17 +0200 Subject: [PATCH 051/285] [cmake] add cublas dependencies To avoid `undefined reference to __fatbinwrap_XXXX` --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b2677276..d636cbb4 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,7 +58,7 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") # cannot use PRIVATE here as there is a bug in FindCUDA and CUDA_ADD_LIBRARY # https://gitlab.kitware.com/cmake/cmake/issues/16097 -target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY}) +target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) # EXPORTING THE LIBRARY From c388b2e8845e85d75b4211b462cafd5150993cc5 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 18 Oct 2018 14:08:26 +0200 Subject: [PATCH 052/285] [win32] Bugfix freeing page-aligned memory on Win32 --- src/popsift/common/assist.h | 9 +++++++++ src/popsift/common/plane_2d.cu | 12 ++++-------- src/popsift/features.cu | 4 ++-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index d00f280e..5106fee2 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -139,4 +139,13 @@ static void* memalign(size_t alignment, size_t size) #endif } +static void memalign_free( void* ptr ) +{ +#ifdef _WIN32 + _aligned_free( ptr ); +#else + free( ptr ); +#endif +} + }; // namespace popsift diff --git a/src/popsift/common/plane_2d.cu b/src/popsift/common/plane_2d.cu index 9e09570e..f5dd56e4 100644 --- a/src/popsift/common/plane_2d.cu +++ b/src/popsift/common/plane_2d.cu @@ -101,20 +101,16 @@ void PlaneBase::freeHost2D( void* data, PlaneMapMode m ) { if (!data) return; - if (m == CudaAllocated) { + else if (m == CudaAllocated) { cudaFreeHost(data); return; } - if (m == Unaligned) { + else if (m == Unaligned) { free(data); return; } - if (m == PageAligned) { -#ifdef _WIN32 - _aligned_free(data); -#else - free(data); -#endif + else if (m == PageAligned) { + memalign_free( data ); return; } assert(!"Invalid PlaneMapMode"); diff --git a/src/popsift/features.cu b/src/popsift/features.cu index eb694522..c634023f 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -51,8 +51,8 @@ FeaturesHost::FeaturesHost( int num_ext, int num_ori ) FeaturesHost::~FeaturesHost( ) { - free( _ext ); - free( _ori ); + memalign_free( _ext ); + memalign_free( _ori ); } void FeaturesHost::reset( int num_ext, int num_ori ) From 792854c538da1693064f587116818bf5b649e026 Mon Sep 17 00:00:00 2001 From: Yann Lanthony Date: Thu, 29 Nov 2018 17:07:19 +0100 Subject: [PATCH 053/285] [build] auto-build export symbols for shared libs on Windows otherwise, no .lib file is generated --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38e63ac7..72514f25 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,8 @@ if(BUILD_SHARED_LIBS) set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) # Workaround to force deactivation of cuda static runtime for cmake < 3.10 set(CUDA_cudart_static_LIBRARY 0) + # Auto-build dll exports on Windows + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) else() message(STATUS "BUILD_SHARED_LIBS OFF") option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON) From 08e96a62783ade7576f405a3fb4724dfd7e164ba Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 5 Apr 2019 16:12:39 +0200 Subject: [PATCH 054/285] [doc] add paper citation --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 46c26a3d..172638f3 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,28 @@ License PopSift is licensed under [MPL v2 license](LICENSE.md). However, SIFT is patented in the US and perhaps other countries, and this license does not release users of this code from any requirements that may arise from such patents. +Cite Us +-------- + +If you use PopSift for your publication, please cite us as: +```bibtex +@inproceedings{Griwodz2018Popsift, + author = {Griwodz, Carsten and Calvet, Lilian and Halvorsen, P{\aa}l}, + title = {Popsift: A Faithful SIFT Implementation for Real-time Applications}, + booktitle = {Proceedings of the 9th {ACM} Multimedia Systems Conference}, + series = {MMSys '18}, + year = {2018}, + isbn = {978-1-4503-5192-8}, + location = {Amsterdam, Netherlands}, + pages = {415--420}, + numpages = {6}, + doi = {10.1145/3204949.3208136}, + acmid = {3208136}, + publisher = {ACM}, + address = {New York, NY, USA}, +} +``` + Authors ------- From bd2945bfaec26a92fab5ded011ecceeecee762bd Mon Sep 17 00:00:00 2001 From: Yann Lanthony Date: Mon, 8 Apr 2019 17:50:53 +0200 Subject: [PATCH 055/285] [plane_2d] update step for host buffers after resetDimensions Host (CPU) Plane2D references contiguous memory and doesn't have pitch. Its step must be updated when it changes dimensions. * split resetDimensions into resetDimensionsHost and resetDimensionsDev * resetDimensionsHost: reference contiguous memory => update Plane2D's step to width * elemSize() * resetDimensionsDev: reference pitched memory => let the step untouched --- src/popsift/common/plane_2d.h | 38 +++++++++++++++++++++++++++-------- src/popsift/s_image.cu | 20 +++++++++--------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/popsift/common/plane_2d.h b/src/popsift/common/plane_2d.h index dd978e42..72a65bee 100644 --- a/src/popsift/common/plane_2d.h +++ b/src/popsift/common/plane_2d.h @@ -13,6 +13,7 @@ #include #include #include +#include #define PLANE2D_CUDA_OP_DEBUG @@ -285,10 +286,18 @@ template class Plane2D : public PitchPlane2D } /** Overwrite the width and height information. Useful if smaller - * planes should be loaded into larger preallocated planes + * planes should be loaded into larger preallocated host planes * without actually allocating again, but dangerous. + * @warning: step is updated (host side) */ - __host__ void resetDimensions( int w, int h ); + __host__ void resetDimensionsHost( int w, int h ); + + /** Overwrite the width and height information. Useful if smaller + * planes should be loaded into larger preallocated device planes + * without actually allocating again, but dangerous. + * @warning: step is not updated (device side) + */ + __host__ void resetDimensionsDev( int w, int h ); /** cuda memcpy from this (plane allocated on host) to * parameter (plane allocated on device) */ @@ -350,16 +359,29 @@ template class Plane2D : public PitchPlane2D template __host__ -void Plane2D::resetDimensions( int w, int h ) +void Plane2D::resetDimensionsHost( int w, int h ) { - if( w*sizeof(T) > this->getPitch() ) { - std::cerr << __FILE__ << ":" << __LINE__ << std::endl - << " Error: trying to reinterpret plane width to " << w << " units a " << sizeof(T) << " bytes, " - "only " << this->getPitch() << " bytes allocated" << std::endl; - exit( -1 ); + this->_cols = w; + this->_rows = h; + // on the host side, memory is contiguous (no padding) => step must be updated to match data + this->step = w * this->elemSize(); +} + +template +__host__ +void Plane2D::resetDimensionsDev( int w, int h ) +{ + // validate pitch + if( w * this->elemSize() > this->getPitch() ) { + std::stringstream err; + err << __FILE__ << ":" << __LINE__ << std::endl + << " Error: trying to reinterpret plane width to " << w << " units a " << sizeof(T) << " bytes, " + << "only " << this->getPitch() << " bytes allocated"; + throw std::runtime_error(err.str()); } this->_cols = w; this->_rows = h; + // on the device side, memory is NOT contiguous (may be padded) => step can not be changed without reallocation } template diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index 8970b5cf..90232e0e 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -74,7 +74,7 @@ void Image::load( void* input ) * is in CUDA-allocated pinned host memory, which makes the H2D copy * much faster. */ - memcpy( _input_image_h.data, input, _w*_h ); + memcpy( _input_image_h.data, input, _w*_h ); // assume that host Plane2D has no pitch _input_image_h.memcpyToDevice( _input_image_d ); } @@ -94,8 +94,8 @@ void Image::resetDimensions( int w, int h ) _h = h; if( w <= _max_w && h <= _max_h ) { - _input_image_h.resetDimensions( w, h ); - _input_image_d.resetDimensions( w, h ); + _input_image_h.resetDimensionsHost( w, h ); + _input_image_d.resetDimensionsDev( w, h ); destroyTexture( ); createTexture( ); @@ -108,8 +108,8 @@ void Image::resetDimensions( int w, int h ) _input_image_d.freeDev( ); _input_image_h.allocHost( _max_w, _max_h, popsift::CudaAllocated ); _input_image_d.allocDev( _max_w, _max_h ); - _input_image_h.resetDimensions( w, h ); - _input_image_d.resetDimensions( w, h ); + _input_image_h.resetDimensionsHost( w, h ); + _input_image_d.resetDimensionsDev( w, h ); destroyTexture( ); createTexture( ); @@ -198,7 +198,7 @@ void ImageFloat::load( void* input ) * is in CUDA-allocated pinned host memory, which makes the H2D copy * much faster. */ - memcpy( _input_image_h.data, input, _w*_h*sizeof(float) ); + memcpy( _input_image_h.data, input, _w*_h*sizeof(float) ); // assume that host Plane2D has no pitch _input_image_h.memcpyToDevice( _input_image_d ); } @@ -218,8 +218,8 @@ void ImageFloat::resetDimensions( int w, int h ) _h = h; if( w <= _max_w && h <= _max_h ) { - _input_image_h.resetDimensions( w, h ); - _input_image_d.resetDimensions( w, h ); + _input_image_h.resetDimensionsHost( w, h ); + _input_image_d.resetDimensionsDev( w, h ); destroyTexture( ); createTexture( ); @@ -232,8 +232,8 @@ void ImageFloat::resetDimensions( int w, int h ) _input_image_d.freeDev( ); _input_image_h.allocHost( _max_w, _max_h, popsift::CudaAllocated ); _input_image_d.allocDev( _max_w, _max_h ); - _input_image_h.resetDimensions( w, h ); - _input_image_d.resetDimensions( w, h ); + _input_image_h.resetDimensionsHost( w, h ); + _input_image_d.resetDimensionsDev( w, h ); destroyTexture( ); createTexture( ); From 4784719be7d9c021b2fdf3c0e7ccb161f779c664 Mon Sep 17 00:00:00 2001 From: Yann Lanthony Date: Mon, 8 Apr 2019 19:18:39 +0200 Subject: [PATCH 056/285] [plane_2D] rename step to _pitchInBytes --- src/popsift/common/plane_2d.h | 54 ++++++++++++++++++----------------- src/popsift/s_image.cu | 4 +-- src/popsift/s_image.h | 6 ++-- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/src/popsift/common/plane_2d.h b/src/popsift/common/plane_2d.h index 72a65bee..42fe1c6d 100644 --- a/src/popsift/common/plane_2d.h +++ b/src/popsift/common/plane_2d.h @@ -14,6 +14,7 @@ #include #include #include +#include #define PLANE2D_CUDA_OP_DEBUG @@ -105,13 +106,11 @@ template struct PlaneT : public PlaneBase template struct PitchPlane2D : public PlaneT { - int step; // this is the pitch width in bytes!!! - __host__ __device__ - PitchPlane2D( ) : step(0) { } + PitchPlane2D( ) : _pitchInBytes(0) { } __host__ __device__ - PitchPlane2D( T* d, int s ) : PlaneT(d) , step(s) { } + PitchPlane2D( T* d, int s ) : PlaneT(d) , _pitchInBytes(s) { } /** cuda memcpy from this (plane allocated on host) to * parameter (plane allocated on device) */ @@ -142,16 +141,16 @@ template struct PitchPlane2D : public PlaneT short cols, short rows, cudaStream_t stream ); __host__ __device__ inline const T* ptr( int y ) const { - return (const T*)( (const char*)this->data + y * step ); + return (const T*)( (const char*)this->data + y * _pitchInBytes ); } __host__ __device__ inline T* ptr( int y ) { - return (T*)( (char*)this->data + y * step ); + return (T*)( (char*)this->data + y * _pitchInBytes ); } __host__ inline void allocDev( int w, int h ) { size_t pitch; this->data = (T*)PlaneBase::allocDev2D( pitch, w, h, this->elemSize() ); - this->step = pitch; + this->_pitchInBytes = pitch; } __host__ inline void freeDev( ) { @@ -162,14 +161,17 @@ template struct PitchPlane2D : public PlaneT __host__ inline void allocHost( int w, int h, PlaneMapMode mode ) { this->data = (T*)PlaneBase::allocHost2D( w, h, this->elemSize(), mode ); - this->step = w * this->elemSize(); + this->_pitchInBytes = w * this->elemSize(); } __host__ inline void freeHost( PlaneMapMode mode ) { PlaneBase::freeHost2D( this->data, mode ); } __host__ __device__ - inline short getPitch( ) const { return step; } + inline short getPitchInBytes( ) const { return _pitchInBytes; } + +protected: + int _pitchInBytes; // pitch width in bytes }; /************************************************************* @@ -182,8 +184,8 @@ template __host__ inline void PitchPlane2D::memcpyToDevice( PitchPlane2D& devPlane, short cols, short rows ) { - PlaneBase::memcpyToDevice( devPlane.data, devPlane.step, - this->data, this->step, + PlaneBase::memcpyToDevice( devPlane.data, devPlane._pitchInBytes, + this->data, this->_pitchInBytes, cols, rows, sizeof(T) ); } @@ -192,8 +194,8 @@ template __host__ inline void PitchPlane2D::memcpyToDevice( PitchPlane2D& devPlane, short cols, short rows, cudaStream_t stream ) { - PlaneBase::memcpyToDevice( devPlane.data, devPlane.step, - this->data, this->step, + PlaneBase::memcpyToDevice( devPlane.data, devPlane._pitchInBytes, + this->data, this->_pitchInBytes, cols, rows, sizeof(T), stream ); @@ -217,8 +219,8 @@ template __host__ inline void PitchPlane2D::memcpyFromDevice( PitchPlane2D& devPlane, short cols, short rows ) { - PlaneBase::memcpyToHost( this->data, this->step, - devPlane.data, devPlane.step, + PlaneBase::memcpyToHost( this->data, this->_pitchInBytes, + devPlane.data, devPlane._pitchInBytes, cols, rows, sizeof(T) ); } @@ -227,8 +229,8 @@ template __host__ inline void PitchPlane2D::memcpyFromDevice( PitchPlane2D& devPlane, short cols, short rows, cudaStream_t stream ) { - PlaneBase::memcpyToHost( this->data, this->step, - devPlane.data, devPlane.step, + PlaneBase::memcpyToHost( this->data, this->_pitchInBytes, + devPlane.data, devPlane._pitchInBytes, cols, rows, sizeof(T), stream ); @@ -276,7 +278,7 @@ template class Plane2D : public PitchPlane2D template __host__ __device__ explicit Plane2D( const Plane2D& orig ) - : PitchPlane2D( (T*)orig.data, orig.step ) + : PitchPlane2D( (T*)orig.data, orig._pitchInBytes ) , _rows( orig.getRows() ) { // careful computation: cols is a short @@ -288,14 +290,14 @@ template class Plane2D : public PitchPlane2D /** Overwrite the width and height information. Useful if smaller * planes should be loaded into larger preallocated host planes * without actually allocating again, but dangerous. - * @warning: step is updated (host side) + * @warning: pitch is updated (host side) */ __host__ void resetDimensionsHost( int w, int h ); /** Overwrite the width and height information. Useful if smaller * planes should be loaded into larger preallocated device planes * without actually allocating again, but dangerous. - * @warning: step is not updated (device side) + * @warning: pitch is not updated (device side) */ __host__ void resetDimensionsDev( int w, int h ); @@ -336,7 +338,7 @@ template class Plane2D : public PitchPlane2D __host__ __device__ inline short getHeight( ) const { return _rows; } __host__ __device__ - inline short getByteSize( ) const { return this->step*_rows; } + inline short getByteSize( ) const { return this->_pitchInBytes*_rows; } __host__ inline void allocDev( int w, int h ) { _cols = w; @@ -363,8 +365,8 @@ void Plane2D::resetDimensionsHost( int w, int h ) { this->_cols = w; this->_rows = h; - // on the host side, memory is contiguous (no padding) => step must be updated to match data - this->step = w * this->elemSize(); + // on the host side, memory is contiguous (no padding) => pitch must be updated to match data + this->_pitchInBytes = w * this->elemSize(); } template @@ -372,16 +374,16 @@ __host__ void Plane2D::resetDimensionsDev( int w, int h ) { // validate pitch - if( w * this->elemSize() > this->getPitch() ) { + if( w * this->elemSize() > this->getPitchInBytes() ) { std::stringstream err; err << __FILE__ << ":" << __LINE__ << std::endl << " Error: trying to reinterpret plane width to " << w << " units a " << sizeof(T) << " bytes, " - << "only " << this->getPitch() << " bytes allocated"; + << "only " << this->getPitchInBytes() << " bytes allocated"; throw std::runtime_error(err.str()); } this->_cols = w; this->_rows = h; - // on the device side, memory is NOT contiguous (may be padded) => step can not be changed without reallocation + // on the device side, memory is NOT contiguous (CUDA may add padding) => pitch can not be changed without reallocation } template diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index 90232e0e..fe902d4d 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -159,7 +159,7 @@ void Image::createTexture( ) _input_image_resDesc.res.pitch2D.desc.z = 0; _input_image_resDesc.res.pitch2D.desc.w = 0; assert( _input_image_d.elemSize() == 1 ); - _input_image_resDesc.res.pitch2D.pitchInBytes = _input_image_d.step; + _input_image_resDesc.res.pitch2D.pitchInBytes = _input_image_d.getPitchInBytes(); _input_image_resDesc.res.pitch2D.width = _input_image_d.getCols(); _input_image_resDesc.res.pitch2D.height = _input_image_d.getRows(); @@ -283,7 +283,7 @@ void ImageFloat::createTexture( ) _input_image_resDesc.res.pitch2D.desc.z = 0; _input_image_resDesc.res.pitch2D.desc.w = 0; assert( _input_image_d.elemSize() == 4 ); - _input_image_resDesc.res.pitch2D.pitchInBytes = _input_image_d.step; /* the step in Plane2D is in bytes */ + _input_image_resDesc.res.pitch2D.pitchInBytes = _input_image_d.getPitchInBytes(); _input_image_resDesc.res.pitch2D.width = _input_image_d.getCols(); _input_image_resDesc.res.pitch2D.height = _input_image_d.getRows(); diff --git a/src/popsift/s_image.h b/src/popsift/s_image.h index dec15a29..e908e691 100755 --- a/src/popsift/s_image.h +++ b/src/popsift/s_image.h @@ -26,7 +26,7 @@ struct ImageBase virtual ~ImageBase( ); - /** Reallocation that takes care of pitch/step when new dimensions + /** Reallocation that takes care of pitch when new dimensions * are smaller and actually reallocation when they are bigger. */ virtual void resetDimensions( int w, int h ) = 0; @@ -76,7 +76,7 @@ struct Image : public ImageBase virtual ~Image( ); - /** Reallocation that takes care of pitch/step when new dimensions + /** Reallocation that takes care of pitch when new dimensions * are smaller and actually reallocation when they are bigger. */ virtual void resetDimensions( int w, int h ); @@ -116,7 +116,7 @@ struct ImageFloat : public ImageBase virtual ~ImageFloat( ); - /** Reallocation that takes care of pitch/step when new dimensions + /** Reallocation that takes care of pitch when new dimensions * are smaller and actually reallocation when they are bigger. */ virtual void resetDimensions( int w, int h ); From 4602d02ec99da84da1063d769df07c5223bace11 Mon Sep 17 00:00:00 2001 From: Stefan de Konink Date: Thu, 1 Aug 2019 15:02:02 +0200 Subject: [PATCH 057/285] Add required dependencies for building --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 172638f3..cd34eeae 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,14 @@ PopSift PopSift is an implementation of the SIFT algorithm in CUDA. PopSift tries to stick as closely as possible to David Lowe's famous paper (Lowe, D. G. (2004). Distinctive Image Features from Scale-Invariant Keypoints. International Journal of Computer Vision, 60(2), 91–110. doi:10.1023/B:VISI.0000029664.99615.94), while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. +Dependencies +------------ + +Most of the dependencies can be installed from the common repositories (apt, yum etc): + +Boost >= 1.55 ([atomic, chrono, date-time, system, thread]-dev) +CUDA >= 7.0 +DevIL (libdevil-dev) Build ----- From 7ee2abca424e247c9689bf260ad3e8e22b6641d2 Mon Sep 17 00:00:00 2001 From: Stefan de Konink Date: Thu, 1 Aug 2019 16:18:19 +0200 Subject: [PATCH 058/285] Make explicit that DevIL is only required for the application. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cd34eeae..6498e009 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Most of the dependencies can be installed from the common repositories (apt, yum Boost >= 1.55 ([atomic, chrono, date-time, system, thread]-dev) CUDA >= 7.0 -DevIL (libdevil-dev) +DevIL (libdevil-dev) (only required for the application) Build ----- From d7d25d0d46089c5d495ca280eb7280db6a695897 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 9 Dec 2019 10:09:07 +0100 Subject: [PATCH 059/285] [cmake] fix for Boost 1.70 --- src/application/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index bc4b9fdc..84ef6e65 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -15,6 +15,10 @@ endif() find_package(Boost 1.53.0 REQUIRED COMPONENTS program_options system filesystem) find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR +set(Boost_INCLUDE_DIRS "") +set(Boost_LIBRARIES "") +find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options) + set(PD_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) set(PD_LINK_LIBS ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY}) From de5311ef4fcd8b43127f06bebfc1d22d1b02d6e5 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 17 Dec 2019 11:17:06 +0100 Subject: [PATCH 060/285] [bugfix] fix incorrect use of syncthreads inside a conditional loop --- src/popsift/s_desc_loop.cu | 79 ++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/src/popsift/s_desc_loop.cu b/src/popsift/s_desc_loop.cu index e6491a95..8581bc1d 100644 --- a/src/popsift/s_desc_loop.cu +++ b/src/popsift/s_desc_loop.cu @@ -76,46 +76,49 @@ void ext_desc_loop_sub( const float ang, float dpt[9] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; - for( int i = threadIdx.x; i < loops; i+=blockDim.x ) + for( int i = threadIdx.x; popsift::any(i < loops); i+=blockDim.x ) { - const int ii = i / wx + ymin; - const int jj = i % wx + xmin; - - const float2 d = make_float2( jj - ptx, ii - pty ); - - // const float nx = crsbp * dx + srsbp * dy; - // const float ny = crsbp * dy - srsbp * dx; - const float2 n = make_float2( ::fmaf( crsbp, d.x, srsbp * d.y ), - ::fmaf( crsbp, d.y, -srsbp * d.x ) ); - const float2 nn = abs(n); - if (nn.x < 1.0f && nn.y < 1.0f) { - float mod; - float th; - get_gradiant( mod, th, jj, ii, layer_tex, level ); - - const float2 dn = n + offsetpt; - const float ww = __expf( -scalbnf(dn.x*dn.x + dn.y*dn.y, -3)); - // const float ww = __expf(-0.125f * (dnx*dnx + dny*dny)); // speedup ! - const float2 w = make_float2( 1.0f - nn.x, + if( i < loops ) + { + const int ii = i / wx + ymin; + const int jj = i % wx + xmin; + + const float2 d = make_float2( jj - ptx, ii - pty ); + + // const float nx = crsbp * dx + srsbp * dy; + // const float ny = crsbp * dy - srsbp * dx; + const float2 n = make_float2( ::fmaf( crsbp, d.x, srsbp * d.y ), + ::fmaf( crsbp, d.y, -srsbp * d.x ) ); + const float2 nn = abs(n); + if (nn.x < 1.0f && nn.y < 1.0f) { + float mod; + float th; + get_gradiant( mod, th, jj, ii, layer_tex, level ); + + const float2 dn = n + offsetpt; + const float ww = __expf( -scalbnf(dn.x*dn.x + dn.y*dn.y, -3)); + // const float ww = __expf(-0.125f * (dnx*dnx + dny*dny)); // speedup ! + const float2 w = make_float2( 1.0f - nn.x, 1.0f - nn.y ); - const float wgt = ww * w.x * w.y * mod; - - th -= ang; - th += ( th < 0.0f ? M_PI2 : 0.0f ); // if (th < 0.0f ) th += M_PI2; - th -= ( th >= M_PI2 ? M_PI2 : 0.0f ); // if (th >= M_PI2) th -= M_PI2; - - const float tth = __fmul_ru( th, M_4RPI ); // th * M_4RPI; - const int fo0 = (int)floorf(tth); - const float do0 = tth - fo0; - const float wgt1 = 1.0f - do0; - const float wgt2 = do0; - - int fo = fo0 % DESC_BINS; - - // maf: multiply-add - // _ru - round to positive infinity equiv to froundf since always >=0 - dpt[fo] = __fmaf_ru( wgt1, wgt, dpt[fo] ); // dpt[fo] += (wgt1*wgt); - dpt[fo+1] = __fmaf_ru( wgt2, wgt, dpt[fo+1] ); // dpt[fo+1] += (wgt2*wgt); + const float wgt = ww * w.x * w.y * mod; + + th -= ang; + th += ( th < 0.0f ? M_PI2 : 0.0f ); // if (th < 0.0f ) th += M_PI2; + th -= ( th >= M_PI2 ? M_PI2 : 0.0f ); // if (th >= M_PI2) th -= M_PI2; + + const float tth = __fmul_ru( th, M_4RPI ); // th * M_4RPI; + const int fo0 = (int)floorf(tth); + const float do0 = tth - fo0; + const float wgt1 = 1.0f - do0; + const float wgt2 = do0; + + int fo = fo0 % DESC_BINS; + + // maf: multiply-add + // _ru - round to positive infinity equiv to froundf since always >=0 + dpt[fo] = __fmaf_ru( wgt1, wgt, dpt[fo] ); // dpt[fo] += (wgt1*wgt); + dpt[fo+1] = __fmaf_ru( wgt2, wgt, dpt[fo+1] ); // dpt[fo+1] += (wgt2*wgt); + } } __syncthreads(); } From f93abbe24f00292829302df2a9a68bbb4226ecf6 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 17 Dec 2019 18:50:53 +0100 Subject: [PATCH 061/285] [bugfix] fix the bug in bitonic_sort --- src/popsift/common/warp_bitonic_sort.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popsift/common/warp_bitonic_sort.h b/src/popsift/common/warp_bitonic_sort.h index 39b693a3..b1df04d8 100644 --- a/src/popsift/common/warp_bitonic_sort.h +++ b/src/popsift/common/warp_bitonic_sort.h @@ -66,8 +66,8 @@ class Warp32 : ( my_val < other_val ); const bool must_swap = not ( my_more ^ reverse ^ increasing ); - return ( must_swap ? popsift::shuffle_xor( my_index, 1 << shift ) - : my_index ); + int lane = must_swap ? ( 1 << shift ) : 0; + return popsift::shuffle_xor( my_index, lane ); } __device__ inline From 83b8e854097a0a54ed9fe3a11eaff946ca415c96 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Wed, 18 Dec 2019 09:44:27 +0100 Subject: [PATCH 062/285] [travis] add travis_retry on apt commands --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 45e77f20..6e6ecb3e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -61,8 +61,8 @@ install: - wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - - sudo apt-get -y update - - sudo apt-get install -y --no-install-recommends cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION + - travis_retry sudo apt-get -y update + - travis_retry sudo apt-get install -y --no-install-recommends cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda before_script: From 94b68a74ecb653f3a8c089877416730cf86afec8 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:23:56 +0100 Subject: [PATCH 063/285] [cmake] removed redundant find_package --- src/application/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 84ef6e65..4cb95490 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -12,7 +12,6 @@ else() find_package(PopSift CONFIG REQUIRED) endif() -find_package(Boost 1.53.0 REQUIRED COMPONENTS program_options system filesystem) find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR set(Boost_INCLUDE_DIRS "") From fb25124ab1768ed07b01b2831e011d99123e5e44 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:24:20 +0100 Subject: [PATCH 064/285] [apps] fix includes with standard cpp --- src/application/main.cpp | 2 +- src/application/match.cpp | 2 +- src/application/pgmread.cpp | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index ad9d2574..62b145ab 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/application/match.cpp b/src/application/match.cpp index c18e2f48..fe8865dd 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index 50ae310f..51890617 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -5,8 +5,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include +#include #include #include #include From aed95c55451fd22a856249eb575b051c6f82a435 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:25:42 +0100 Subject: [PATCH 065/285] [apps] using nullptr --- src/application/main.cpp | 2 +- src/application/pgmread.cpp | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index 62b145ab..e9c8bdf6 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -213,8 +213,8 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) nvtxRangePushA( "load and convert image - pgmread" ); image_data = readPGMfile( inputFile, w, h ); - if( image_data == 0 ) { exit( -1 ); + if( image_data == nullptr ) { } nvtxRangePop( ); // "load and convert image - pgmread" diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index 51890617..e70017b6 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -40,13 +40,13 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) if( not boost::filesystem::exists( input_file ) ) { cerr << "File " << input_file << " does not exist" << endl; - return 0; + return nullptr; } ifstream pgmfile( filename.c_str(), ios::binary ); if( not pgmfile.is_open() ) { cerr << "File " << input_file << " could not be opened for reading" << endl; - return 0; + return nullptr; } string pgmtype; @@ -54,7 +54,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) getline( pgmfile, pgmtype ); // this is the string version of getline() if( pgmfile.fail() ) { cerr << "File " << input_file << " is too short" << endl; - return 0; + return nullptr; } boost::algorithm::trim_left( pgmtype ); // nice because of trim } while( pgmtype.at(0) == '#' ); @@ -66,7 +66,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) else if( pgmtype.substr(0,2) == "P6" ) type = 6; else { cerr << "File " << input_file << " can only contain P2, P3, P5 or P6 PGM images" << endl; - return 0; + return nullptr; } char line[1000]; @@ -78,7 +78,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) if( pgmfile.fail() ) { cerr << "File " << input_file << " is too short" << endl; - return 0; + return nullptr; } int num = pgmfile.gcount(); parse = line; @@ -91,20 +91,20 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) cerr << "Error in " << __FILE__ << ":" << __LINE__ << endl << "File " << input_file << " PGM type header (" << type << ") must be followed by comments and WxH info" << endl << "but line contains " << parse << endl; - return 0; + return nullptr; } } while( *parse == '#' ); if( w <= 0 || h <= 0 ) { cerr << "File " << input_file << " has meaningless image size" << endl; - return 0; + return nullptr; } do { pgmfile.getline( line, 1000 ); if( pgmfile.fail() ) { cerr << "File " << input_file << " is too short" << endl; - return 0; + return nullptr; } int num = pgmfile.gcount(); parse = line; @@ -115,7 +115,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) int ct = sscanf( parse, "%d", &maxval ); if( ct != 1 ) { cerr << "File " << input_file << " PGM dimensions must be followed by comments and max value info" << endl; - return 0; + return nullptr; } } while( *parse == '#' ); @@ -135,7 +135,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) if( pgmfile.fail() ) { cerr << "File " << input_file << " file too short" << endl; delete [] input_data; - return 0; + return nullptr; } } break; @@ -155,7 +155,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) cerr << "File " << input_file << " file too short" << endl; delete [] i2; delete [] input_data; - return 0; + return nullptr; } } for( int i=0; i Date: Wed, 18 Dec 2019 11:27:43 +0100 Subject: [PATCH 066/285] [apps] using EXIT_{SUCCESS,FAILURE} --- src/application/main.cpp | 10 ++++++---- src/application/match.cpp | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index e9c8bdf6..ab9f7dce 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -213,8 +213,8 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) nvtxRangePushA( "load and convert image - pgmread" ); image_data = readPGMfile( inputFile, w, h ); - exit( -1 ); if( image_data == nullptr ) { + exit( EXIT_FAILURE ); } nvtxRangePop( ); // "load and convert image - pgmread" @@ -278,7 +278,7 @@ int main(int argc, char **argv) } catch (std::exception& e) { std::cout << e.what() << std::endl; - exit(1); + return EXIT_FAILURE; } if( boost::filesystem::exists( inputFile ) ) { @@ -287,13 +287,13 @@ int main(int argc, char **argv) collectFilenames( inputFiles, inputFile ); if( inputFiles.empty() ) { cerr << "No files in directory, nothing to do" << endl; - exit( 0 ); + return EXIT_SUCCESS; } } else if( boost::filesystem::is_regular_file( inputFile ) ) { inputFiles.push_back( inputFile ); } else { cout << "Input file is neither regular file nor directory, nothing to do" << endl; - exit( -1 ); + return EXIT_FAILURE; } } @@ -324,5 +324,7 @@ int main(int argc, char **argv) } PopSift.uninit( ); + + return EXIT_SUCCESS; } diff --git a/src/application/match.cpp b/src/application/match.cpp index fe8865dd..19b7ae8a 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -202,7 +202,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) { image_data = readPGMfile( inputFile, w, h ); if( image_data == 0 ) { - exit( -1 ); + exit( EXIT_FAILURE ); } nvtxRangePop( ); @@ -231,20 +231,20 @@ int main(int argc, char **argv) } catch (std::exception& e) { std::cout << e.what() << std::endl; - exit(1); + return EXIT_SUCCESS; } if( boost::filesystem::exists( lFile ) ) { if( not boost::filesystem::is_regular_file( lFile ) ) { cout << "Input file " << lFile << " is not a regular file, nothing to do" << endl; - exit( -1 ); + return EXIT_FAILURE; } } if( boost::filesystem::exists( rFile ) ) { if( not boost::filesystem::is_regular_file( rFile ) ) { cout << "Input file " << rFile << " is not a regular file, nothing to do" << endl; - exit( -1 ); + return EXIT_FAILURE; } } @@ -271,5 +271,7 @@ int main(int argc, char **argv) delete rFeatures; PopSift.uninit( ); + + return EXIT_SUCCESS; } From 4e798528287203889278dd73c68b0b4497f0640e Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:28:27 +0100 Subject: [PATCH 067/285] [apps] reducing var scope --- src/application/main.cpp | 9 ++++----- src/application/match.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index ab9f7dce..82922171 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -170,8 +170,6 @@ static void collectFilenames( list& inputFiles, const boost::filesystem: SiftJob* process_image( const string& inputFile, PopSift& PopSift ) { - int w; - int h; SiftJob* job; unsigned char* image_data; @@ -195,8 +193,8 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) cerr << "Failed converting image " << inputFile << " to unsigned greyscale image" << endl; exit( -1 ); } - w = img.Width(); - h = img.Height(); + const auto w = img.Width(); + const auto h = img.Height(); cout << "Loading " << w << " x " << h << " image " << inputFile << endl; image_data = img.GetData(); @@ -211,7 +209,8 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) #endif { nvtxRangePushA( "load and convert image - pgmread" ); - + int w{}; + int h{}; image_data = readPGMfile( inputFile, w, h ); if( image_data == nullptr ) { exit( EXIT_FAILURE ); diff --git a/src/application/match.cpp b/src/application/match.cpp index 19b7ae8a..32464dd1 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -167,8 +167,6 @@ static void collectFilenames( list& inputFiles, const boost::filesystem: SiftJob* process_image( const string& inputFile, PopSift& PopSift ) { - int w; - int h; unsigned char* image_data; SiftJob* job; @@ -185,8 +183,8 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) cerr << "Failed converting image " << inputFile << " to unsigned greyscale image" << endl; exit( -1 ); } - w = img.Width(); - h = img.Height(); + const auto w = img.Width(); + const auto h = img.Height(); cout << "Loading " << w << " x " << h << " image " << inputFile << endl; image_data = img.GetData(); @@ -200,8 +198,10 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) else #endif { + int h{}; + int w{}; image_data = readPGMfile( inputFile, w, h ); - if( image_data == 0 ) { + if( image_data == nullptr ) { exit( EXIT_FAILURE ); } From f9f5b91da8137a574ba625c4de088204907c8cf2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:29:11 +0100 Subject: [PATCH 068/285] [apps] better code --- src/application/pgmread.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index e70017b6..d316fda7 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -69,12 +69,13 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) return nullptr; } - char line[1000]; char* parse; int maxval; + const int maxLineSize{1000}; + char line[maxLineSize]; do { - pgmfile.getline( line, 1000 ); + pgmfile.getline( line, maxLineSize ); if( pgmfile.fail() ) { cerr << "File " << input_file << " is too short" << endl; @@ -101,7 +102,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) } do { - pgmfile.getline( line, 1000 ); + pgmfile.getline( line, maxLineSize ); if( pgmfile.fail() ) { cerr << "File " << input_file << " is too short" << endl; return nullptr; From 38b88e68a0296acb0982dfea3f884e0f0a9f6c84 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:30:04 +0100 Subject: [PATCH 069/285] [apps] better init --- src/application/main.cpp | 3 +-- src/application/match.cpp | 15 +++++++-------- src/application/pgmread.cpp | 4 ++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index 82922171..a93083cf 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -268,8 +268,7 @@ int main(int argc, char **argv) popsift::Config config; list inputFiles; - string inputFile = ""; - const char* appName = argv[0]; + string inputFile{}; try { parseargs( argc, argv, config, inputFile ); // Parse command line diff --git a/src/application/match.cpp b/src/application/match.cpp index 32464dd1..ef9e3729 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -38,11 +38,11 @@ using namespace std; -static bool print_dev_info = false; -static bool print_time_info = false; -static bool write_as_uchar = false; -static bool dont_write = false; -static bool pgmread_loading = false; +static bool print_dev_info {false}; +static bool print_time_info {false}; +static bool write_as_uchar {false}; +static bool dont_write {false}; +static bool pgmread_loading {false}; static void parseargs(int argc, char** argv, popsift::Config& config, string& lFile, string& rFile) { using namespace boost::program_options; @@ -221,9 +221,8 @@ int main(int argc, char **argv) cudaDeviceReset(); popsift::Config config; - string lFile = ""; - string rFile = ""; - const char* appName = argv[0]; + string lFile{}; + string rFile{}; try { parseargs( argc, argv, config, lFile, rFile ); // Parse command line diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index d316fda7..824190cf 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -69,10 +69,10 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) return nullptr; } - char* parse; - int maxval; const int maxLineSize{1000}; char line[maxLineSize]; + char* parse{nullptr}; + int maxval{}; do { pgmfile.getline( line, maxLineSize ); From fdd187cbfc3435c0826cae6c51ca39636df2307b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:30:42 +0100 Subject: [PATCH 070/285] [apps] const --- src/application/pgmread.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index 824190cf..bc17271e 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -87,7 +87,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) parse++; } if( *parse == '#' ) continue; - int ct = sscanf( parse, "%d %d", &w, &h ); + const int ct = sscanf( parse, "%d %d", &w, &h ); if( ct != 2 ) { cerr << "Error in " << __FILE__ << ":" << __LINE__ << endl << "File " << input_file << " PGM type header (" << type << ") must be followed by comments and WxH info" << endl @@ -113,7 +113,7 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) parse++; } if( *parse == '#' ) continue; - int ct = sscanf( parse, "%d", &maxval ); + const int ct = sscanf( parse, "%d", &maxval ); if( ct != 1 ) { cerr << "File " << input_file << " PGM dimensions must be followed by comments and max value info" << endl; return nullptr; @@ -161,15 +161,15 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) } for( int i=0; i> RATE_SHIFT ); + const unsigned int r = *src; src++; + const unsigned int g = *src; src++; + const unsigned int b = *src; src++; + const unsigned int res = ( ( R_RATE*r+G_RATE*g+B_RATE*b ) >> RATE_SHIFT ); input_data[i] = (unsigned char)res; #else // RGB2GRAY_IN_INT - float r = *src; src++; - float g = *src; src++; - float b = *src; src++; + const float r = *src; src++; + const float g = *src; src++; + const float b = *src; src++; input_data[i] = (unsigned char)( R_RATE*r+G_RATE*g+B_RATE*b ); #endif // RGB2GRAY_IN_INT } From 325a5b1f381d7fe589a17d457fe32e8549710e56 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 18 Dec 2019 11:31:00 +0100 Subject: [PATCH 071/285] [apps] auto --- src/application/main.cpp | 2 +- src/application/pgmread.cpp | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index a93083cf..45115ce1 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -227,7 +227,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) } else { - float* f_image_data = new float [w * h]; + auto f_image_data = new float [w * h]; for( int i=0; i Date: Wed, 18 Dec 2019 11:31:09 +0100 Subject: [PATCH 072/285] [apps] range loop --- src/application/main.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index 45115ce1..109b2deb 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -304,10 +304,9 @@ int main(int argc, char **argv) float_mode ? PopSift::FloatImages : PopSift::ByteImages ); std::queue jobs; - for( auto it = inputFiles.begin(); it!=inputFiles.end(); it++ ) { - inputFile = it->c_str(); - - SiftJob* job = process_image( inputFile, PopSift ); + for(const auto& currFile : inputFiles) + { + SiftJob* job = process_image( currFile, PopSift ); jobs.push( job ); } From 5218f7965afc9daa57ad52a487feff870f4ed01e Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 19 Dec 2019 09:27:43 +0100 Subject: [PATCH 073/285] [cuda] simplify fix in loop --- src/popsift/s_desc_loop.cu | 73 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/src/popsift/s_desc_loop.cu b/src/popsift/s_desc_loop.cu index 8581bc1d..bd723249 100644 --- a/src/popsift/s_desc_loop.cu +++ b/src/popsift/s_desc_loop.cu @@ -78,50 +78,49 @@ void ext_desc_loop_sub( const float ang, for( int i = threadIdx.x; popsift::any(i < loops); i+=blockDim.x ) { - if( i < loops ) - { - const int ii = i / wx + ymin; - const int jj = i % wx + xmin; - - const float2 d = make_float2( jj - ptx, ii - pty ); - - // const float nx = crsbp * dx + srsbp * dy; - // const float ny = crsbp * dy - srsbp * dx; - const float2 n = make_float2( ::fmaf( crsbp, d.x, srsbp * d.y ), - ::fmaf( crsbp, d.y, -srsbp * d.x ) ); - const float2 nn = abs(n); - if (nn.x < 1.0f && nn.y < 1.0f) { - float mod; - float th; - get_gradiant( mod, th, jj, ii, layer_tex, level ); - - const float2 dn = n + offsetpt; - const float ww = __expf( -scalbnf(dn.x*dn.x + dn.y*dn.y, -3)); - // const float ww = __expf(-0.125f * (dnx*dnx + dny*dny)); // speedup ! - const float2 w = make_float2( 1.0f - nn.x, + if( i >= loops ) continue; + + const int ii = i / wx + ymin; + const int jj = i % wx + xmin; + + const float2 d = make_float2( jj - ptx, ii - pty ); + + // const float nx = crsbp * dx + srsbp * dy; + // const float ny = crsbp * dy - srsbp * dx; + const float2 n = make_float2( ::fmaf( crsbp, d.x, srsbp * d.y ), + ::fmaf( crsbp, d.y, -srsbp * d.x ) ); + const float2 nn = abs(n); + if (nn.x < 1.0f && nn.y < 1.0f) { + float mod; + float th; + get_gradiant( mod, th, jj, ii, layer_tex, level ); + + const float2 dn = n + offsetpt; + const float ww = __expf( -scalbnf(dn.x*dn.x + dn.y*dn.y, -3)); + // const float ww = __expf(-0.125f * (dnx*dnx + dny*dny)); // speedup ! + const float2 w = make_float2( 1.0f - nn.x, 1.0f - nn.y ); - const float wgt = ww * w.x * w.y * mod; + const float wgt = ww * w.x * w.y * mod; - th -= ang; - th += ( th < 0.0f ? M_PI2 : 0.0f ); // if (th < 0.0f ) th += M_PI2; - th -= ( th >= M_PI2 ? M_PI2 : 0.0f ); // if (th >= M_PI2) th -= M_PI2; + th -= ang; + th += ( th < 0.0f ? M_PI2 : 0.0f ); // if (th < 0.0f ) th += M_PI2; + th -= ( th >= M_PI2 ? M_PI2 : 0.0f ); // if (th >= M_PI2) th -= M_PI2; - const float tth = __fmul_ru( th, M_4RPI ); // th * M_4RPI; - const int fo0 = (int)floorf(tth); - const float do0 = tth - fo0; - const float wgt1 = 1.0f - do0; - const float wgt2 = do0; + const float tth = __fmul_ru( th, M_4RPI ); // th * M_4RPI; + const int fo0 = (int)floorf(tth); + const float do0 = tth - fo0; + const float wgt1 = 1.0f - do0; + const float wgt2 = do0; - int fo = fo0 % DESC_BINS; + int fo = fo0 % DESC_BINS; - // maf: multiply-add - // _ru - round to positive infinity equiv to froundf since always >=0 - dpt[fo] = __fmaf_ru( wgt1, wgt, dpt[fo] ); // dpt[fo] += (wgt1*wgt); - dpt[fo+1] = __fmaf_ru( wgt2, wgt, dpt[fo+1] ); // dpt[fo+1] += (wgt2*wgt); - } + // maf: multiply-add + // _ru - round to positive infinity equiv to froundf since always >=0 + dpt[fo] = __fmaf_ru( wgt1, wgt, dpt[fo] ); // dpt[fo] += (wgt1*wgt); + dpt[fo+1] = __fmaf_ru( wgt2, wgt, dpt[fo+1] ); // dpt[fo+1] += (wgt2*wgt); } - __syncthreads(); } + __syncthreads(); dpt[0] += dpt[8]; From d81799e67f59aa0b3ed5e250adad8fe6fa5e5102 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 15:49:02 +0100 Subject: [PATCH 074/285] [doc] fixing the readme --- README.md | 78 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 6498e009..c680dd7d 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,28 @@ -PopSift -======= +# PopSift + PopSift is an implementation of the SIFT algorithm in CUDA. -PopSift tries to stick as closely as possible to David Lowe's famous paper (Lowe, D. G. (2004). Distinctive Image Features from Scale-Invariant Keypoints. International Journal of Computer Vision, 60(2), 91–110. doi:10.1023/B:VISI.0000029664.99615.94), while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. +PopSift tries to stick as closely as possible to David Lowe's famous paper [1], while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. + +## HW requirements + -Dependencies ------------- +PopSift compiles and works with NVidia cards of compute capability >= 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. -Most of the dependencies can be installed from the common repositories (apt, yum etc): +## Dependencies -Boost >= 1.55 ([atomic, chrono, date-time, system, thread]-dev) -CUDA >= 7.0 -DevIL (libdevil-dev) (only required for the application) +PopSift depends on: -Build ------ +* Boost >= 1.55 (required components {atomic, chrono, date-time, system, thread}-dev) -PopSift has been developed and tested on Linux machines, mostly a variant of Ubuntu, but compiles on MacOSX as well. It comes as a CMake project and requires at least CUDA 7.0 and Boost >= 1.55. It is known to compile and work with NVidia cards of compute capability 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. +* CUDA >= 7.0 -If you want to avoid building the application you can run cmake with the option `-DPopSift_BUILD_EXAMPLES:BOOL=OFF`. -If you want to build PopSift as a shared library: `-DBUILD_SHARED_LIBS=ON`. +Optionally, for the provided applications: + +* DevIL (libdevil-dev) can be used to load a broader range of image formats, otherwise only pgm is supported. + +## Build In order to build the library you can run: @@ -31,7 +33,14 @@ make make install ``` -Continuous integration: +Some build options are available: + +* `PopSift_BUILD_EXAMPLES` (default: `OFF`) enable building the applications that showcase the use of the library. + +* `BUILD_SHARED_LIBS` controls the type of library to build (`ON` for dynamic libraries, `OFF` for static) + + +### Continuous integration: - [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=master)](https://travis-ci.org/alicevision/popsift) master branch. - [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=develop)](https://travis-ci.org/alicevision/popsift) develop branch. - [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) @@ -39,14 +48,15 @@ Continuous integration: -Usage ------ +## Usage -Two artifacts are made: `libpopsift` and the test application `popsift-demo`. Calling popsift-demo without parameters shows the options. +Two artifacts are made: `libpopsift` and, if enabled, the test application `popsift-demo`. Calling `popsift-demo` without parameters shows the options. ### Using PopSift as third party -To integrate PopSift into other software, link with `libpopsift`. If your are using CMake for building your project you can easily add PopSift to your project. Once you have built and installed PopSift in a directory (say, ``), in your `CMakeLists.txt` file just add the dependency +To integrate PopSift into other software, link with `libpopsift`. +If your are using CMake for building your project you can easily add PopSift to your project. +Once you have built and installed PopSift in a directory (say, ``), in your `CMakeLists.txt` file just add the dependency ```cmake # Find the package from the PopSiftConfig.cmake @@ -73,23 +83,30 @@ cmake .. -DPopSift_DIR=/lib/cmake/PopSift/ The caller must create a `popart::Config` struct (documented in `src/sift/sift_conf.h`) to control the behaviour of the PopSift, and instantiate an object of class `PopSift` (found in `src/sift/popsift.h`). -After this, images can be enqueued for SIFT extraction using (`enqueue()`). The only valid input format is a single plane of grayscale unsigned characters. Only host memory limits the number of images that can be enqueued. The `enqueue` function returns a pointer to a `SiftJob` immediately and performs the feature extraction asynchronously. The memory of the image passed to enqueue remains the caller's responsibility. Calling `SiftJob::get` on the returned job blocks until features are extracted, and returns them. +After this, images can be enqueued for SIFT extraction using (`enqueue()`). +The only valid input format is a single plane of grayscale unsigned characters. +Only host memory limits the number of images that can be enqueued. +The `enqueue` function returns a pointer to a `SiftJob` immediately and performs the feature extraction asynchronously. +The memory of the image passed to enqueue remains the caller's responsibility. Calling `SiftJob::get` on the returned job blocks until features are extracted, and returns them. -Features offer iterators that iterate over objects of type `Feature`. Both classes are documented in `sift_extremum.h`. Each feature represents a feature point in the coordinate system of the input image, providing X and Y coordinates and scale (sigma), as well as several alternative descriptors for the feature point (according to Lowe, 15% of the feature points should be expected to have 2 or more descriptors). +Features offer iterators that iterate over objects of type `Feature`. +Both classes are documented in `sift_extremum.h`. +Each feature represents a feature point in the coordinate system of the input image, providing X and Y coordinates and scale (sigma), as well as several alternative descriptors for the feature point (according to Lowe, 15% of the feature points should be expected to have 2 or more descriptors). In an alternate, deprecated, blocking API, `init()` must be called to pass image width and height to PopSift, followed by a call to `executed()` that takes image data and returns the extracted features. `execute()` is synchronous and blocking. -As far as we know, no implementation that is faster than PopSift at the time of PopSift's release comes under a license that allows commercial use and sticks close to the original paper at the same time as well. PopSift can be configured at runtime to use constants that affect it behaviours. In particular, users can choose to generate results very similar to VLFeat or results that are closer (but not as close) to the SIFT implementation of the OpenCV extras. We acknowledge that there is at least one SIFT implementation that is vastly faster, but it makes considerable sacifices in terms of accuracy and compatibility. +As far as we know, no implementation that is faster than PopSift at the time of PopSift's release comes under a license that allows commercial use and sticks close to the original paper at the same time as well. +PopSift can be configured at runtime to use constants that affect it behaviours. +In particular, users can choose to generate results very similar to VLFeat or results that are closer (but not as close) to the SIFT implementation of the OpenCV extras. +We acknowledge that there is at least one SIFT implementation that is vastly faster, but it makes considerable sacrifices in terms of accuracy and compatibility. -License -------- +## License PopSift is licensed under [MPL v2 license](LICENSE.md). However, SIFT is patented in the US and perhaps other countries, and this license does not release users of this code from any requirements that may arise from such patents. -Cite Us --------- +## Cite Us If you use PopSift for your publication, please cite us as: ```bibtex @@ -111,7 +128,10 @@ If you use PopSift for your publication, please cite us as: ``` -Authors -------- +## Authors + +PopSift was developed within the project [POPART](http://www.popartproject.eu), which has been funded by the European Commission in the Horizon 2020 framework. + +___ -It was developed within the project [POPART](http://www.popartproject.eu), which has been funded by the European Commission in the Horizon 2020 framework. +[1]: Lowe, D. G. (2004). Distinctive Image Features from Scale-Invariant Keypoints. International Journal of Computer Vision, 60(2), 91–110. doi:10.1023/B:VISI.0000029664.99615.94 \ No newline at end of file From 2cb056ff578435ce1ad18dc6681f48311ee7af8d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 17:43:01 +0100 Subject: [PATCH 075/285] [doc] LICENSE -> COPYING --- LICENSE.md => COPYING.md | 0 README.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename LICENSE.md => COPYING.md (100%) diff --git a/LICENSE.md b/COPYING.md similarity index 100% rename from LICENSE.md rename to COPYING.md diff --git a/README.md b/README.md index c680dd7d..26d888b8 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ We acknowledge that there is at least one SIFT implementation that is vastly fas ## License -PopSift is licensed under [MPL v2 license](LICENSE.md). +PopSift is licensed under [MPL v2 license](COPYING.md). However, SIFT is patented in the US and perhaps other countries, and this license does not release users of this code from any requirements that may arise from such patents. ## Cite Us From c2cacc21b11df4d8dbdadf76b93ae78d01b91ea2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 17:43:39 +0100 Subject: [PATCH 076/285] [github] github templates for issues and pr --- .github/ISSUE_TEMPLATE/bug_report.md | 36 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 +++++++++++++ .github/ISSUE_TEMPLATE/question_help.md | 29 ++++++++++++++++++ .github/pull_request_template.md | 29 ++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/question_help.md create mode 100644 .github/pull_request_template.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..a3b071e4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,36 @@ +--- +name: Bug report +about: Create a report to help us improve +title: "[bug]" +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Log** +If applicable, copy paste the relevant log output (please embed the text in a markdown code tag "\`\`\`" ) + +**Desktop (please complete the following and other pertinent information):** + - OS: [e.g. win 10, osx, ] + - PopSift version: please specify if you are using a release version or your own build + - Binary version (if applicable) [e.g. 2019.1] + - Commit reference (if applicable) [e.g. 08ddbe2] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..52683c44 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: "[request]" +labels: feature request +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/question_help.md b/.github/ISSUE_TEMPLATE/question_help.md new file mode 100644 index 00000000..71035cde --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question_help.md @@ -0,0 +1,29 @@ +--- +name: Question or help needed +about: Ask question or for help for issues not related to program failures (e.g. "where I can find this feature", "my image is not recognized", "which parameter setting shall I use" etc...) +title: "[question]" +labels: type:question +assignees: '' + +--- + +**Describe the problem** +A clear and concise description of what the problem is. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Dataset** +If applicable, add a link or *few* images to help better understand where the problem may come from. + +**Log** +If applicable, copy paste the relevant log output (please embed the text in a markdown code tag "\`\`\`" ) + +**Desktop (please complete the following and other pertinent information):** + - OS: [e.g. win 10, osx, ] + - PopSift version: please specify if you are using a release version or your own build + - Binary version (if applicable) [e.g. 2019.1] + - Commit reference (if applicable) [e.g. 08ddbe2] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..de3799ad --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,29 @@ + +## Description + + + +## Features list + + + + +## Implementation remarks + + + From 4a9a777bebd76da0025582f76886d6f11190e31f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 17:43:56 +0100 Subject: [PATCH 077/285] [github] stale settings for issues --- .github/stale.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/stale.yml diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 00000000..006179d6 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,28 @@ +# Number of days of inactivity before an issue becomes stale +daysUntilStale: 120 +# Number of days of inactivity before a stale issue is closed +daysUntilClose: 7 +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - "do not close" + +# Set to true to ignore issues in a project (defaults to false) +exemptProjects: true + +# Set to true to ignore issues in a milestone (defaults to false) +exemptMilestones: true + +# Set to true to ignore issues with an assignee (defaults to false) +exemptAssignees: true + +# Label to use when marking an issue as stale +staleLabel: stale +# Comment to post when marking an issue as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. +# Comment to post when closing a stale issue. Set to `false` to disable +closeComment: > + This issue is closed due to inactivity. Feel free to re-open if new information + is available. From fed7a9ca710febb3b6bc81f31c45d31672a35225 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 17:44:14 +0100 Subject: [PATCH 078/285] [doc] code of conduct and contributing --- CODE_OF_CONDUCT.md | 74 ++++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 35 ++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..e2233aee --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,74 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team privately at alicevision-team@googlegroups.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct/ + +[homepage]: https://www.contributor-covenant.org + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..384a2781 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,35 @@ +Contributing to PopSift +=========================== + +PopSift relies on a friendly and community-driven effort to create an open source photogrammetry solution. +In order to foster a friendly atmosphere where technical collaboration can flourish, +we recommend you to read the [code of conduct](CODE_OF_CONDUCT.md). + + +Contributing Workflow +--------------------- + +The contributing workflow relies on [Github Pull Requests](https://help.github.com/articles/using-pull-requests/). + +1. If it is an important change, we recommend you to discuss it on the mailing-list before starting implementation. + This ensure that the development is aligned with other +developpements already started and will be efficiently integrated. + +2. Create the corresponding issues. + +3. Create a branch and start a PR starting by [WIP], like "[WIP] My new feature" so everyone can follow the development. + Explain the implementation in the PR description with links to issues. + +4. Implement the new feature(s). Add unit test if needed. +One feature per PR is ideal for review, but linked features can be part of the same PR. + +5. When it is ready for review, remove "[WIP]" from the PR name. + +6. The reviewers will look over the code and ask for changes, explain problems they found, +congratulate the author, etc. using the github comments. + +7. After approval, one of the developers with commit approval to the official main repository +will merge your fixes into the "develop" branch. + +8. If not already the case, your name will be added to the [contributors list](CONTRIBUTORS.md). + From 89cd83e6176fa30c3237036607a8716c709be217 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 17:53:54 +0100 Subject: [PATCH 079/285] [cmake] removed FindPopsift and move Config.cmake to root --- {src/cmake => cmake}/Config.cmake.in | 0 cmake/FindPopsift.cmake | 60 ---------------------------- src/CMakeLists.txt | 2 +- 3 files changed, 1 insertion(+), 61 deletions(-) rename {src/cmake => cmake}/Config.cmake.in (100%) delete mode 100644 cmake/FindPopsift.cmake diff --git a/src/cmake/Config.cmake.in b/cmake/Config.cmake.in similarity index 100% rename from src/cmake/Config.cmake.in rename to cmake/Config.cmake.in diff --git a/cmake/FindPopsift.cmake b/cmake/FindPopsift.cmake deleted file mode 100644 index a9a48119..00000000 --- a/cmake/FindPopsift.cmake +++ /dev/null @@ -1,60 +0,0 @@ -# Locate the Popsift libraries. -# -# Defines the following variables: -# -# POPSIFT_FOUND - TRUE if the popsift headers and libs are found -# POPSIFT_INCLUDE_DIRS - The path to popsift headers -# -# POPSIFT_LIBRARIES - Libraries to link against to use popsift. -# POPSIFT_LIBRARY_DIR - The base directory to search for popsift. -# -# Accepts the following variables as input: -# -# POPSIFT_DIR - (as a CMake or environment variable) -# The root directory of the popsift install prefix - -MESSAGE(STATUS "Looking for popsift.") - -FIND_PATH(POPSIFT_INCLUDE_DIR popsift/popsift.h - HINTS - $ENV{POPSIFT_DIR}/include - ${POPSIFT_DIR}/include - PATH_SUFFIXES - popsift -) - -find_package(CUDA 7.0 REQUIRED) -find_package(Boost 1.53.0 REQUIRED COMPONENTS system filesystem) - -IF(POPSIFT_INCLUDE_DIR) - MESSAGE(STATUS "popsift headers found in ${POPSIFT_INCLUDE_DIR}") -ELSE() - MESSAGE(STATUS "POPSIFT_INCLUDE_DIR NOT FOUND") -ENDIF (POPSIFT_INCLUDE_DIR) - -FIND_LIBRARY(POPSIFT_LIBRARY NAMES popsift - HINTS - $ENV{POPSIFT_DIR} - ${POPSIFT_DIR} - PATH_SUFFIXES - lib - lib/popsift -) -GET_FILENAME_COMPONENT(POPSIFT_LIBRARY_DIR "${POPSIFT_LIBRARY}" PATH) - -SET(POPSIFT_LIBRARIES ${POPSIFT_LIBRARY}) -SET(POPSIFT_INCLUDE_DIRS ${POPSIFT_INCLUDE_DIR}) - -IF(POPSIFT_LIBRARY) - MESSAGE(STATUS "popsift libraries found: ${POPSIFT_LIBRARY}") - MESSAGE(STATUS "popsift libraries directories: ${POPSIFT_LIBRARY_DIR}") -ENDIF (POPSIFT_LIBRARY) - -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments and set POPSIFT_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args(popsift DEFAULT_MSG - POPSIFT_LIBRARY POPSIFT_INCLUDE_DIR) - -MARK_AS_ADVANCED(POPSIFT_INCLUDE_DIR POPSIFT_LIBRARY) - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d636cbb4..9d23db5b 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -88,7 +88,7 @@ write_basic_package_version_file("${version_config}" COMPATIBILITY SameMajorVers # Use variables: # * targets_export_name # * PROJECT_NAME -configure_package_config_file("cmake/Config.cmake.in" +configure_package_config_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" INSTALL_DESTINATION "${config_install_dir}") From 5886b4f9b696f91c49875b2228a80cb500c34427 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 18:38:19 +0100 Subject: [PATCH 080/285] [cmake] config and version headers inside cmake and refactoring --- cmake/sift_config.h.in | 17 +++++++++++++++++ cmake/version.hpp.in | 16 ++++++++++++++++ src/CMakeLists.txt | 29 +++++++++++++++++++++++------ src/popsift/sift_config.h.in | 9 --------- 4 files changed, 56 insertions(+), 15 deletions(-) create mode 100644 cmake/sift_config.h.in create mode 100644 cmake/version.hpp.in mode change 100755 => 100644 src/CMakeLists.txt delete mode 100644 src/popsift/sift_config.h.in diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in new file mode 100644 index 00000000..62d98286 --- /dev/null +++ b/cmake/sift_config.h.in @@ -0,0 +1,17 @@ +/* + * Copyright 2016, Simula Research Laboratory + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#define POPSIFT_IS_DEFINED(F) F() == 1 + +#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ +#define POPSIFT_HAVE_NORMF() @HAVE_NORMF@ +#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ + + diff --git a/cmake/version.hpp.in b/cmake/version.hpp.in new file mode 100644 index 00000000..35481a03 --- /dev/null +++ b/cmake/version.hpp.in @@ -0,0 +1,16 @@ +/* + * Copyright 2016, Simula Research Laboratory + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#define POPSIFT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@ +#define POPSIFT_VERSION_MINOR @PROJECT_VERSION_MINOR@ +#define POPSIFT_VERSION_PATCH @PROJECT_VERSION_PATCH@ + +#define POPSIFT_VERSION @PROJECT_VERSION@ +#define POPSIFT_VERSION_STRING "@PROJECT_VERSION@" \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt old mode 100755 new mode 100644 index 9d23db5b..2c05dcfb --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,15 +42,15 @@ CUDA_ADD_LIBRARY(popsift popsift/common/vec_macros.h popsift/common/clamp.h ) -configure_file(popsift/sift_config.h.in - ${CMAKE_CURRENT_BINARY_DIR}/popsift/sift_config.h - @ONLY) + +# build directory containing the automatically generated files +set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) target_include_directories(popsift PUBLIC ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} - "$") + "$") set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) @@ -67,9 +67,13 @@ target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY} ${CUD set(config_install_dir "lib/cmake/${PROJECT_NAME}") # include directory for install set(include_install_dir "include") +# the name for the generated header version file +set(popsift_version_header_name "${generated_dir}/version.hpp") +# the name for the generated config.hpp +set(popsift_config_header_name "${generated_dir}/sift_config.h") +# where to install the generated files +set(install_dir_generated "${include_install_dir}/popsift") -# build directory containing the generated files -set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") # Configuration set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") @@ -92,6 +96,12 @@ configure_package_config_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" INSTALL_DESTINATION "${config_install_dir}") +# version file +configure_file("${PROJECT_SOURCE_DIR}/cmake/version.hpp.in" ${popsift_version_header_name} @ONLY) +# config file +configure_file("${PROJECT_SOURCE_DIR}/cmake/sift_config.h.in" ${popsift_config_header_name} @ONLY) + + # Targets: # * /lib/libpopsift.a # * header location after install: /include/ @@ -124,6 +134,13 @@ install(EXPORT "${targets_export_name}" NAMESPACE "${namespace}" DESTINATION "${config_install_dir}") +# install the config and version generated files +install( FILES "${popsift_config_header_name}" + DESTINATION "${install_dir_generated}") + +install( FILES "${popsift_version_header_name}" + DESTINATION "${install_dir_generated}") + if(PopSift_BUILD_EXAMPLES) add_subdirectory(application) endif() diff --git a/src/popsift/sift_config.h.in b/src/popsift/sift_config.h.in deleted file mode 100644 index 1337d771..00000000 --- a/src/popsift/sift_config.h.in +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#define POPSIFT_IS_DEFINED(F) F() == 1 - -#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ -#define POPSIFT_HAVE_NORMF() @HAVE_NORMF@ -#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ - - From 02fa995ced5473f9a4ed7368c0297f24cd4fe49a Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 30 Jan 2020 18:39:08 +0100 Subject: [PATCH 081/285] [cmake] just formatting --- src/CMakeLists.txt | 74 +++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2c05dcfb..32378e0c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,44 +3,44 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/popsift) CUDA_ADD_LIBRARY(popsift - popsift/popsift.cpp popsift/popsift.h - popsift/features.cu popsift/features.h - popsift/sift_constants.cu popsift/sift_constants.h - popsift/sift_conf.cu popsift/sift_conf.h - popsift/gauss_filter.cu popsift/gauss_filter.h - popsift/s_image.cu popsift/s_image.h - popsift/sift_pyramid.cu popsift/sift_pyramid.h - popsift/sift_octave.cu popsift/sift_octave.h - popsift/s_pyramid_build.cu - popsift/s_pyramid_build_aa.cu popsift/s_pyramid_build_aa.h - popsift/s_pyramid_build_ai.cu popsift/s_pyramid_build_ai.h - popsift/s_pyramid_build_ra.cu popsift/s_pyramid_build_ra.h - popsift/s_pyramid_fixed.cu - popsift/sift_extremum.h - popsift/sift_extremum.cu popsift/s_extrema.cu - popsift/s_orientation.cu + popsift/popsift.cpp popsift/popsift.h + popsift/features.cu popsift/features.h + popsift/sift_constants.cu popsift/sift_constants.h + popsift/sift_conf.cu popsift/sift_conf.h + popsift/gauss_filter.cu popsift/gauss_filter.h + popsift/s_image.cu popsift/s_image.h + popsift/sift_pyramid.cu popsift/sift_pyramid.h + popsift/sift_octave.cu popsift/sift_octave.h + popsift/s_pyramid_build.cu + popsift/s_pyramid_build_aa.cu popsift/s_pyramid_build_aa.h + popsift/s_pyramid_build_ai.cu popsift/s_pyramid_build_ai.h + popsift/s_pyramid_build_ra.cu popsift/s_pyramid_build_ra.h + popsift/s_pyramid_fixed.cu + popsift/sift_extremum.h + popsift/sift_extremum.cu popsift/s_extrema.cu + popsift/s_orientation.cu popsift/s_filtergrid.cu - popsift/sift_desc.cu - popsift/s_desc_loop.cu popsift/s_desc_loop.h - popsift/s_desc_iloop.cu popsift/s_desc_iloop.h - popsift/s_desc_grid.cu popsift/s_desc_grid.h - popsift/s_desc_igrid.cu popsift/s_desc_igrid.h - popsift/s_desc_notile.cu popsift/s_desc_notile.h - popsift/s_desc_norm_rs.h - popsift/s_desc_norm_l2.h - popsift/s_desc_normalize.h - popsift/s_gradiant.h - popsift/s_solve.h - popsift/common/assist.cu popsift/common/assist.h - popsift/common/clamp.h - popsift/common/plane_2d.cu popsift/common/plane_2d.h - popsift/common/write_plane_2d.cu popsift/common/write_plane_2d.h - popsift/common/debug_macros.cu popsift/common/debug_macros.h - popsift/common/device_prop.cu popsift/common/device_prop.h - popsift/common/warp_bitonic_sort.h - popsift/common/excl_blk_prefix_sum.h - popsift/common/vec_macros.h - popsift/common/clamp.h ) + popsift/sift_desc.cu + popsift/s_desc_loop.cu popsift/s_desc_loop.h + popsift/s_desc_iloop.cu popsift/s_desc_iloop.h + popsift/s_desc_grid.cu popsift/s_desc_grid.h + popsift/s_desc_igrid.cu popsift/s_desc_igrid.h + popsift/s_desc_notile.cu popsift/s_desc_notile.h + popsift/s_desc_norm_rs.h + popsift/s_desc_norm_l2.h + popsift/s_desc_normalize.h + popsift/s_gradiant.h + popsift/s_solve.h + popsift/common/assist.cu popsift/common/assist.h + popsift/common/clamp.h + popsift/common/plane_2d.cu popsift/common/plane_2d.h + popsift/common/write_plane_2d.cu popsift/common/write_plane_2d.h + popsift/common/debug_macros.cu popsift/common/debug_macros.h + popsift/common/device_prop.cu popsift/common/device_prop.h + popsift/common/warp_bitonic_sort.h + popsift/common/excl_blk_prefix_sum.h + popsift/common/vec_macros.h + popsift/common/clamp.h) # build directory containing the automatically generated files From b568a8e2de457c4303959772af0742b3fdd255a2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 11:56:46 +0100 Subject: [PATCH 082/285] [cmake] added languages to the project --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 72514f25..3d6624af 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # CMake below 3.4 does not work with CUDA separable compilation at all cmake_minimum_required(VERSION 3.4) -project(PopSift VERSION 1.0.0) +project(PopSift VERSION 1.0.0 LANGUAGES CXX) OPTION(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) OPTION(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) From 36762cc187dd0af63f1cea89926bcb9805ac1857 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 11:57:14 +0100 Subject: [PATCH 083/285] [cmake] just lowercase --- CMakeLists.txt | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d6624af..a991a1c9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,15 +3,17 @@ cmake_minimum_required(VERSION 3.4) project(PopSift VERSION 1.0.0 LANGUAGES CXX) -OPTION(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) -OPTION(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) -OPTION(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) -OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) -OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) -OPTION(PopSift_USE_NORMF "The __normf function computes Euclidian distance on large arrays. Fast but stability is uncertain." OFF) -OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) -OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) -OPTION(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) + +option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) +option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) +option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) +option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) +option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) +option(PopSift_USE_NORMF "The __normf function computes Euclidian distance on large arrays. Fast but stability is uncertain." OFF) +option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) +option(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) +option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) + if(PopSift_BOOST_USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) From 318036ffe5a0110a782bf7ce8e026c303810d886 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 11:58:22 +0100 Subject: [PATCH 084/285] [cmake] building everything in (system)-(architecture) --- CMakeLists.txt | 3 +++ src/CMakeLists.txt | 2 -- src/application/CMakeLists.txt | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a991a1c9..79dac88f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,9 @@ cmake_minimum_required(VERSION 3.4) project(PopSift VERSION 1.0.0 LANGUAGES CXX) +# Set build path as a folder named as the platform (linux, windows, darwin...) plus the processor type +set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +set(LIBRARY_OUTPUT_PATH "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 32378e0c..86c30190 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,3 @@ -set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) - CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/popsift) CUDA_ADD_LIBRARY(popsift diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index bc4b9fdc..54f80bf4 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -45,7 +45,7 @@ target_include_directories(popsift-demo PUBLIC ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-demo PRIVATE ${Boost_DEFINITIONS} BOOST_ALL_DYN_LINK BOOST_ALL_NO_LIB) target_link_libraries(popsift-demo PUBLIC PopSift::popsift ${PD_LINK_LIBS}) -set_target_properties(popsift-demo PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" ) +set_target_properties(popsift-demo PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") ############################################################# # popsift-match @@ -60,7 +60,7 @@ target_include_directories(popsift-match PUBLIC ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS} BOOST_ALL_DYN_LINK BOOST_ALL_NO_LIB) target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) -set_target_properties(popsift-match PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" ) +set_target_properties(popsift-match PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") ############################################################# # installation From 9d99e22f6f26dd528caa0a7c4e5a517444df3e70 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 12:04:49 +0100 Subject: [PATCH 085/285] [cmake] using GNUInstallDirs --- CMakeLists.txt | 8 ++++++++ src/CMakeLists.txt | 10 +++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 79dac88f..a7666ba7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# ============================================================================== +# GNUInstallDirs CMake module +# - Define GNU standard installation directories +# - Provides install directory variables as defined by the GNU Coding Standards. +# ============================================================================== +include(GNUInstallDirs) + + # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -G") # set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -G") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 86c30190..869d5ecd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,9 +62,9 @@ target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY} ${CUD # EXPORTING THE LIBRARY # # place to put the cmake-related files -set(config_install_dir "lib/cmake/${PROJECT_NAME}") +set(config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") # include directory for install -set(include_install_dir "include") +set(include_install_dir "${CMAKE_INSTALL_INCLUDEDIR}") # the name for the generated header version file set(popsift_version_header_name "${generated_dir}/version.hpp") # the name for the generated config.hpp @@ -106,9 +106,9 @@ configure_file("${PROJECT_SOURCE_DIR}/cmake/sift_config.h.in" ${popsift_config_h # * headers can be included by C++ code `#include ` install(TARGETS popsift EXPORT "${targets_export_name}" - LIBRARY DESTINATION "lib" - ARCHIVE DESTINATION "lib" - RUNTIME DESTINATION "bin" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" INCLUDES DESTINATION "${include_install_dir}") # Headers: From 47c87f1a5fb71b63d24085d0803bed976bc9419e Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 12:05:10 +0100 Subject: [PATCH 086/285] [cmake] fix target_include_directories --- src/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 869d5ecd..5b7da497 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,8 +47,12 @@ set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) target_include_directories(popsift - PUBLIC ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} - "$") + PUBLIC + $ + $ + $ + ${Boost_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS}) set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) From 4682c0589eb67dbacfc2a74d0c0508893a2cfb06 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:14:01 +0100 Subject: [PATCH 087/285] added clangformat --- .clang-format | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..e764f59c --- /dev/null +++ b/.clang-format @@ -0,0 +1,50 @@ +--- +BasedOnStyle: Mozilla +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BreakBeforeBraces: Custom +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +BreakBeforeInheritanceComma: false +ColumnLimit: 120 +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +Cpp11BracedListStyle: true +DerivePointerAlignment: false +IndentCaseLabels: true +IndentWidth: 4 +KeepEmptyLinesAtTheStartOfBlocks: false +Language: Cpp +PointerAlignment: Left +ReflowComments: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: Never +SpaceInEmptyParentheses: false +SpacesInContainerLiterals: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 From 4269560aa1f9c98c013083d704b65126d9086b72 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:16:30 +0100 Subject: [PATCH 088/285] [ci] using cmake 3.13.5 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 45e77f20..14fb33cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,7 +34,7 @@ env: - POPSIFT_APP_INSTALL=${POPSIFT_APP_BUILD}/install # CMAKE # - CMAKE_URL="https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.tar.gz" - - CMAKE_URL="https://cmake.org/files/v3.4/cmake-3.4.1-Linux-x86_64.tar.gz" + - CMAKE_URL="https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.tar.gz" - CMAKE_ROOT=${TRAVIS_BUILD_DIR}/cmake - CMAKE_SOURCE=${CMAKE_ROOT}/source - CMAKE_INSTALL=${CMAKE_ROOT}/install From d327f6c62223fcb1e21ae7db2315ab227fab905d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:33:20 +0100 Subject: [PATCH 089/285] [cmake] build shared by default --- CMakeLists.txt | 1 + README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7666ba7..95c9bb84 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ option(PopSift_USE_NORMF "The __normf function computes Euclidian distance on la option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) option(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) +option(BUILD_SHARED_LIBS "Build shared libraries" ON) if(PopSift_BOOST_USE_STATIC_LIBS) diff --git a/README.md b/README.md index 26d888b8..b60d1053 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Some build options are available: * `PopSift_BUILD_EXAMPLES` (default: `OFF`) enable building the applications that showcase the use of the library. -* `BUILD_SHARED_LIBS` controls the type of library to build (`ON` for dynamic libraries, `OFF` for static) +* `BUILD_SHARED_LIBS` (default: `ON`) controls the type of library to build (`ON` for shared libraries, `OFF` for static) ### Continuous integration: From f1d57d0b474fa24b7fa916f3bf6e90a56f678361 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:33:34 +0100 Subject: [PATCH 090/285] [doc] ack and fixes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b60d1053..e23b4fea 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ make install Some build options are available: -* `PopSift_BUILD_EXAMPLES` (default: `OFF`) enable building the applications that showcase the use of the library. +* `PopSift_BUILD_EXAMPLES` (default: `ON`) enable building the applications that showcase the use of the library. * `BUILD_SHARED_LIBS` (default: `ON`) controls the type of library to build (`ON` for shared libraries, `OFF` for static) @@ -128,7 +128,7 @@ If you use PopSift for your publication, please cite us as: ``` -## Authors +## Acknowledgements PopSift was developed within the project [POPART](http://www.popartproject.eu), which has been funded by the European Commission in the Horizon 2020 framework. From 2a45751c00ae0a368f5298fb83691393cb96d4e4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:34:29 +0100 Subject: [PATCH 091/285] [cmake] CUDA_INCLUDE_DIRECTORIES not necessary for higher versions --- src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5b7da497..99df1510 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,3 @@ -CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/popsift) CUDA_ADD_LIBRARY(popsift popsift/popsift.cpp popsift/popsift.h From ddb1950240ea08ee4da3bd5891e80a4d1660de81 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:36:47 +0100 Subject: [PATCH 092/285] [cmake] useless define version --- cmake/version.hpp.in | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/version.hpp.in b/cmake/version.hpp.in index 35481a03..4c01ec43 100644 --- a/cmake/version.hpp.in +++ b/cmake/version.hpp.in @@ -12,5 +12,4 @@ #define POPSIFT_VERSION_MINOR @PROJECT_VERSION_MINOR@ #define POPSIFT_VERSION_PATCH @PROJECT_VERSION_PATCH@ -#define POPSIFT_VERSION @PROJECT_VERSION@ #define POPSIFT_VERSION_STRING "@PROJECT_VERSION@" \ No newline at end of file From 474c02756f63839322a3c919ddcfdc7421adda70 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:37:03 +0100 Subject: [PATCH 093/285] [cmake] maquillage --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95c9bb84..f02df3c3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,9 @@ include(GNUInstallDirs) # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -G") # set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -G") +################# +# BOOST +################# find_package(Boost 1.53.0 REQUIRED COMPONENTS system thread) if(WIN32) add_definitions("-DBOOST_ALL_NO_LIB") @@ -76,6 +79,9 @@ else() set(CUDA_USE_STATIC_CUDA_RUNTIME ON) endif() +################### +# CUDA +################### find_package(CUDA 7.0 REQUIRED) if(NOT CUDA_FOUND) From 1212888fe79c78430e4351ec63e9ee38478847a2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 31 Jan 2020 14:58:35 +0100 Subject: [PATCH 094/285] [cmake] prepend popsift_ to avoid conflicts --- cmake/Config.cmake.in | 2 +- src/CMakeLists.txt | 52 +++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 5aaa2d8e..b115274c 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -39,5 +39,5 @@ @PACKAGE_INIT@ -include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/@popsift_targets_export_name@.cmake") check_required_components("@PROJECT_NAME@") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 99df1510..79537d05 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,14 +41,14 @@ CUDA_ADD_LIBRARY(popsift # build directory containing the automatically generated files -set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") +set(popsift_generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) target_include_directories(popsift PUBLIC $ - $ + $ $ ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS}) @@ -65,37 +65,37 @@ target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY} ${CUD # EXPORTING THE LIBRARY # # place to put the cmake-related files -set(config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") +set(popsift_config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") # include directory for install -set(include_install_dir "${CMAKE_INSTALL_INCLUDEDIR}") +set(popsift_include_install_dir "${CMAKE_INSTALL_INCLUDEDIR}") # the name for the generated header version file -set(popsift_version_header_name "${generated_dir}/version.hpp") +set(popsift_version_header_name "${popsift_generated_dir}/version.hpp") # the name for the generated config.hpp -set(popsift_config_header_name "${generated_dir}/sift_config.h") +set(popsift_config_header_name "${popsift_generated_dir}/sift_config.h") # where to install the generated files -set(install_dir_generated "${include_install_dir}/popsift") +set(popsift_install_dir_generated "${popsift_include_install_dir}/popsift") # Configuration -set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") -set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake") -set(targets_export_name "${PROJECT_NAME}Targets") -set(namespace "${PROJECT_NAME}::") +set(popsift_cmake_version_config "${popsift_generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") +set(popsift_cmake_project_config "${popsift_generated_dir}/${PROJECT_NAME}Config.cmake") +set(popsift_targets_export_name "${PROJECT_NAME}Targets") +set(popsift_namespace "${PROJECT_NAME}::") # Include module with fuction 'write_basic_package_version_file' include(CMakePackageConfigHelpers) # Configure 'ConfigVersion.cmake' # Note: major version number must be the same as requested -write_basic_package_version_file("${version_config}" COMPATIBILITY SameMajorVersion) +write_basic_package_version_file("${popsift_cmake_version_config}" COMPATIBILITY SameMajorVersion) # Configure 'Config.cmake' # Use variables: -# * targets_export_name +# * popsift_targets_export_name # * PROJECT_NAME configure_package_config_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" - "${project_config}" - INSTALL_DESTINATION "${config_install_dir}") + "${popsift_cmake_project_config}" + INSTALL_DESTINATION "${popsift_config_install_dir}") # version file configure_file("${PROJECT_SOURCE_DIR}/cmake/version.hpp.in" ${popsift_version_header_name} @ONLY) @@ -108,39 +108,39 @@ configure_file("${PROJECT_SOURCE_DIR}/cmake/sift_config.h.in" ${popsift_config_h # * header location after install: /include/ # * headers can be included by C++ code `#include ` install(TARGETS popsift - EXPORT "${targets_export_name}" + EXPORT "${popsift_targets_export_name}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" - INCLUDES DESTINATION "${include_install_dir}") + INCLUDES DESTINATION "${popsift_include_install_dir}") # Headers: install(DIRECTORY "popsift" - DESTINATION "${include_install_dir}" + DESTINATION "${popsift_include_install_dir}" FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/popsift" - DESTINATION "${include_install_dir}" + DESTINATION "${popsift_include_install_dir}" FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") # Config # * /lib/cmake/${PROJECT_NAME}/${PROJECT_NAME}Config.cmake # * /lib/cmake/${PROJECT_NAME}${PROJECT_NAME}ConfigVersion.cmake -install(FILES "${project_config}" "${version_config}" - DESTINATION "${config_install_dir}") +install(FILES "${popsift_cmake_project_config}" "${popsift_cmake_version_config}" + DESTINATION "${popsift_config_install_dir}") # Config # * /lib/cmake/${PROJECT_NAME}/${PROJECT_NAME}Targets.cmake -install(EXPORT "${targets_export_name}" - NAMESPACE "${namespace}" - DESTINATION "${config_install_dir}") +install(EXPORT "${popsift_targets_export_name}" + NAMESPACE "${popsift_namespace}" + DESTINATION "${popsift_config_install_dir}") # install the config and version generated files install( FILES "${popsift_config_header_name}" - DESTINATION "${install_dir_generated}") + DESTINATION "${popsift_install_dir_generated}") install( FILES "${popsift_version_header_name}" - DESTINATION "${install_dir_generated}") + DESTINATION "${popsift_install_dir_generated}") if(PopSift_BUILD_EXAMPLES) add_subdirectory(application) From f98c68e1cabdf9d373e574999e25727814e8e956 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 09:40:15 +0100 Subject: [PATCH 095/285] [cmake] add build recap messages --- CMakeLists.txt | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f02df3c3..b0826e0c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,5 +204,27 @@ CONFIGURE_FILE( "${CMAKE_CURRENT_BINARY_DIR}/cmake/cmake_uninstall.cmake" IMMEDIATE @ONLY) ADD_CUSTOM_TARGET(uninstall - "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake/cmake_uninstall.cmake") - + "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake/cmake_uninstall.cmake") + + +###################################### +# SUMMARY +###################################### +message("\n") +message("******************************************") +message("Building configuration:\n") +message(STATUS "PopSift version: " ${PROJECT_VERSION}) +message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) +message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS}) +message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES}) +message(STATUS "Generate position independent code: " ${PopSift_USE_POSITION_INDEPENDENT_CODE}) +message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) +message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) +message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) +message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD}) +message(STATUS "Link with static Boost libraries: " ${PopSift_BOOST_USE_STATIC_LIBS}) +message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) +message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") +message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) +message("\n******************************************") +message("\n") \ No newline at end of file From 23143e9d25ea30333caba565b68f96c8527ed33e Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 09:55:26 +0100 Subject: [PATCH 096/285] [cmake] fix path to popsift-demo in scripts --- testScripts/CMakeLists.txt | 3 +++ testScripts/{TEST.sh => TEST.sh.in} | 8 +++++--- testScripts/testOxfordDataset.sh.in | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) rename testScripts/{TEST.sh => TEST.sh.in} (85%) diff --git a/testScripts/CMakeLists.txt b/testScripts/CMakeLists.txt index a048a699..77fb7ede 100755 --- a/testScripts/CMakeLists.txt +++ b/testScripts/CMakeLists.txt @@ -4,6 +4,9 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/downloadOxfordDataset.sh.in configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/testOxfordDataset.sh.in ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh ) +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/TEST.sh.in + ${CMAKE_CURRENT_BINARY_DIR}/TEST.sh ) + add_custom_target( prepare-test COMMAND /bin/bash ${CMAKE_CURRENT_BINARY_DIR}/downloadOxfordDataset.sh diff --git a/testScripts/TEST.sh b/testScripts/TEST.sh.in similarity index 85% rename from testScripts/TEST.sh rename to testScripts/TEST.sh.in index 817d62f8..89dd094e 100755 --- a/testScripts/TEST.sh +++ b/testScripts/TEST.sh.in @@ -4,6 +4,8 @@ IMAGE=../../popsift-samples/sample/big_set/boat/img3.ppm # IMAGE=./test-17x17.pgm +POPSIFT_DEMO_BIN=@EXECUTABLE_OUTPUT_PATH@/popsift-demo + LOG=--log # LOG= # GAUSS_MODE="--gauss-mode=vlfeat" @@ -20,9 +22,9 @@ for mode in loop ; do # for mode in loop grid igrid notile ; do # for mode in igrid notile ; do echo "MODE: $mode" - echo "./popsift-demo $PARAMS --desc-mode=$mode --write-as-uchar --norm-multi=9 -i $IMAGE" - ./popsift-demo $PARAMS --desc-mode=$mode --write-as-uchar --norm-multi=9 -i $IMAGE - # ./popsift-demo $LOG --popsift-mode --desc-mode=$mode --octaves=8 --threshold=0.04 --edge-threshold=10.0 --initial-blur=0.5 --write-as-uchar --norm-multi=9 -i $IMAGE + echo "$POPSIFT_DEMO_BIN $PARAMS --desc-mode=$mode --write-as-uchar --norm-multi=9 -i $IMAGE" + $POPSIFT_DEMO_BIN $PARAMS --desc-mode=$mode --write-as-uchar --norm-multi=9 -i $IMAGE + # $POPSIFT_DEMO_BIN $LOG --popsift-mode --desc-mode=$mode --octaves=8 --threshold=0.04 --edge-threshold=10.0 --initial-blur=0.5 --write-as-uchar --norm-multi=9 -i $IMAGE sort -n output-features.txt > UML echo 128 > output-features-$mode.txt wc -l UML >> output-features-$mode.txt diff --git a/testScripts/testOxfordDataset.sh.in b/testScripts/testOxfordDataset.sh.in index 41424d33..1beab726 100644 --- a/testScripts/testOxfordDataset.sh.in +++ b/testScripts/testOxfordDataset.sh.in @@ -45,7 +45,7 @@ do echo "Directory output-$img exists. Skipping." continue fi - @CMAKE_BINARY_DIR@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile + @EXECUTABLE_OUTPUT_PATH@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile if [ $? != 0 ] then echo "Running popsift on $imgfile failed." From 35dacedbed17de54d95c9a4390dbfee62990d101 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 09:59:41 +0100 Subject: [PATCH 097/285] [cmake] avoid possible conflicts --- CMakeLists.txt | 4 ++-- cmake/sift_config.h.in | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b0826e0c..bcf14b9a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,9 +165,9 @@ if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5" endif() if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4") - set(HAVE_NORMF 1) + set(PopSift_HAVE_NORMF 1) else() - set(HAVE_NORMF 0) + set(PopSift_HAVE_NORMF 0) endif() if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in index 62d98286..bb797114 100644 --- a/cmake/sift_config.h.in +++ b/cmake/sift_config.h.in @@ -11,7 +11,7 @@ #define POPSIFT_IS_DEFINED(F) F() == 1 #define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ -#define POPSIFT_HAVE_NORMF() @HAVE_NORMF@ +#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ #define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ From ff5ea23212800837ac690c52b33e6970a200288c Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 11:10:49 +0100 Subject: [PATCH 098/285] using POPSIFT_USE_NVTX instead of def USE_NVTX --- CMakeLists.txt | 4 +++- cmake/sift_config.h.in | 1 + src/application/main.cpp | 2 +- src/application/match.cpp | 2 +- src/popsift/popsift.cpp | 5 +++-- src/popsift/popsift.h | 5 +++-- src/popsift/s_filtergrid.cu | 2 +- src/popsift/s_image.cu | 3 ++- src/popsift/s_orientation.cu | 3 ++- src/popsift/sift_desc.cu | 3 ++- src/popsift/sift_pyramid.cu | 3 ++- 11 files changed, 21 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcf14b9a..5b32dc65 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,7 +189,9 @@ cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" li if(PopSift_USE_NVTX_PROFILING) # library required for NVTX profiling of the CPU cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library") - add_definitions(-DUSE_NVTX) + set(PopSift_USE_NVTX 1) +else() + set(PopSift_USE_NVTX 0) endif() add_subdirectory(src) diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in index bb797114..427cfe42 100644 --- a/cmake/sift_config.h.in +++ b/cmake/sift_config.h.in @@ -13,5 +13,6 @@ #define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ #define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ #define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ +#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ diff --git a/src/application/main.cpp b/src/application/main.cpp index ad9d2574..84ee5412 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -29,7 +29,7 @@ #endif #include "pgmread.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) diff --git a/src/application/match.cpp b/src/application/match.cpp index c18e2f48..23bb93e3 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -29,7 +29,7 @@ #endif #include "pgmread.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index bb1cc5eb..d324db09 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -10,6 +10,7 @@ #include "popsift.h" #include "gauss_filter.h" #include "sift_pyramid.h" +#include "sift_config.h" using namespace std; @@ -283,7 +284,7 @@ void SiftJob::setImg( popsift::ImageBase* img ) popsift::ImageBase* SiftJob::getImg() { -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) _nvtx_id = nvtxRangeStartA( "inserting image" ); #endif return _img; @@ -292,7 +293,7 @@ popsift::ImageBase* SiftJob::getImg() void SiftJob::setFeatures( popsift::FeaturesBase* f ) { _p.set_value( f ); -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) nvtxRangeEnd( _nvtx_id ); #endif } diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 9f410dfb..321d9290 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -17,9 +17,10 @@ #include "sift_conf.h" #include "sift_extremum.h" +#include "sift_config.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangeStartA(a) @@ -45,7 +46,7 @@ class SiftJob int _h; unsigned char* _imageData; popsift::ImageBase* _img; -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) nvtxRangeId_t _nvtx_id; #endif diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index c724a36a..ad1ac0df 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -9,7 +9,7 @@ #include "sift_extremum.h" #include "sift_config.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index fe902d4d..ee37751b 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -12,8 +12,9 @@ #include "common/assist.h" #include #include +#include "sift_config.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index 64ac5863..da7367d1 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -17,8 +17,9 @@ #include "common/warp_bitonic_sort.h" #include "common/debug_macros.h" #include "common/assist.h" +#include "sift_config.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) diff --git a/src/popsift/sift_desc.cu b/src/popsift/sift_desc.cu index a7ba09ac..cf121573 100644 --- a/src/popsift/sift_desc.cu +++ b/src/popsift/sift_desc.cu @@ -20,8 +20,9 @@ #include "s_desc_notile.h" #include "common/assist.h" #include "common/debug_macros.h" +#include "sift_config.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index 0079b109..e59b7013 100755 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -21,8 +21,9 @@ #include "sift_extremum.h" #include "common/debug_macros.h" #include "common/assist.h" +#include "sift_config.h" -#ifdef USE_NVTX +#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else #define nvtxRangePushA(a) From 3a2a7daa8b6fa2b331dc94dcd44580a0f6a6921f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 12:41:12 +0100 Subject: [PATCH 099/285] using c std library c++ way --- src/application/main.cpp | 2 +- src/application/match.cpp | 2 +- src/popsift/s_image.cu | 8 ++++---- src/popsift/s_orientation.cu | 6 +++--- src/popsift/sift_desc.cu | 3 +-- src/popsift/sift_pyramid.cu | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index 84ee5412..232ae4e2 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/application/match.cpp b/src/application/match.cpp index 23bb93e3..5a2f452f 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index ee37751b..fd149a5a 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -6,13 +6,13 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "s_image.h" -#include -#include #include "common/debug_macros.h" #include "common/assist.h" -#include -#include #include "sift_config.h" +#include +#include +#include +#include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index da7367d1..7db2d9cc 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -5,9 +5,9 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include +#include +#include +#include #include "common/assist.h" #include "sift_pyramid.h" diff --git a/src/popsift/sift_desc.cu b/src/popsift/sift_desc.cu index cf121573..ae19dcc0 100644 --- a/src/popsift/sift_desc.cu +++ b/src/popsift/sift_desc.cu @@ -6,8 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include -#include -#include +#include #include "sift_pyramid.h" #include "sift_constants.h" diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index e59b7013..ccb752a2 100755 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #ifdef _WIN32 #include From a134a906a32b4ad11373faca30eedaf528d54e87 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 12:55:07 +0100 Subject: [PATCH 100/285] [cmake] allows include "config" and when in-tree --- src/CMakeLists.txt | 5 +++-- src/application/main.cpp | 1 + src/application/match.cpp | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 79537d05..48248f85 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,6 +49,7 @@ target_include_directories(popsift PUBLIC $ $ + $ $ ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS}) @@ -69,9 +70,9 @@ set(popsift_config_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") # include directory for install set(popsift_include_install_dir "${CMAKE_INSTALL_INCLUDEDIR}") # the name for the generated header version file -set(popsift_version_header_name "${popsift_generated_dir}/version.hpp") +set(popsift_version_header_name "${popsift_generated_dir}/popsift/version.hpp") # the name for the generated config.hpp -set(popsift_config_header_name "${popsift_generated_dir}/sift_config.h") +set(popsift_config_header_name "${popsift_generated_dir}/popsift/sift_config.h") # where to install the generated files set(popsift_install_dir_generated "${popsift_include_install_dir}/popsift") diff --git a/src/application/main.cpp b/src/application/main.cpp index 232ae4e2..dc964b39 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #ifdef USE_DEVIL diff --git a/src/application/match.cpp b/src/application/match.cpp index 5a2f452f..e4bf7d7b 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #ifdef USE_DEVIL From 1d540fb8470a0de10d419283e6ea8d391c48a7b6 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 12:55:22 +0100 Subject: [PATCH 101/285] [cmake] using gnudir --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 48248f85..9c41f38f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -50,7 +50,7 @@ target_include_directories(popsift $ $ $ - $ + $ ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS}) From 46e5557c7176e10d5ebbfffd7f579063e426e3d4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 13:01:32 +0100 Subject: [PATCH 102/285] [app] add print popsift version --- src/application/main.cpp | 3 +++ src/application/match.cpp | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index dc964b39..70fd2fa6 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #ifdef USE_DEVIL @@ -273,6 +274,8 @@ int main(int argc, char **argv) string inputFile = ""; const char* appName = argv[0]; + std::cout << "PopSift version: " << POPSIFT_VERSION_STRING << std::endl; + try { parseargs( argc, argv, config, inputFile ); // Parse command line std::cout << inputFile << std::endl; diff --git a/src/application/match.cpp b/src/application/match.cpp index e4bf7d7b..9a693e43 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -19,11 +19,12 @@ #include #include -#include +#include #include +#include #include #include -#include +#include #ifdef USE_DEVIL #include @@ -226,6 +227,8 @@ int main(int argc, char **argv) string rFile = ""; const char* appName = argv[0]; + std::cout << "PopSift version: " << POPSIFT_VERSION_STRING << std::endl; + try { parseargs( argc, argv, config, lFile, rFile ); // Parse command line std::cout << lFile << " <-> " << rFile << std::endl; From 79e92a6044f1490a0586136283cbaaeca7ba0ceb Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 3 Feb 2020 23:15:30 +0100 Subject: [PATCH 103/285] [cmake] add target file for in-build use --- src/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9c41f38f..a885d150 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -143,6 +143,9 @@ install( FILES "${popsift_config_header_name}" install( FILES "${popsift_version_header_name}" DESTINATION "${popsift_install_dir_generated}") +# Generate ${PROJECT_NAME}Targets.cmake in the build directory to use the library without installing it +export(TARGETS popsift FILE "${popsift_generated_dir}/${popsift_targets_export_name}.cmake") + if(PopSift_BUILD_EXAMPLES) add_subdirectory(application) endif() From eb3d9b3462656ae063fabcc34150ab865257aeba Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 4 Feb 2020 18:33:19 +0100 Subject: [PATCH 104/285] [cmake] removed redundant flags --- CMakeLists.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b32dc65..d4975941 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,11 +37,12 @@ else() message(STATUS "Building in ${CMAKE_BUILD_TYPE} configuration") endif() -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3") +# for some reason this line is necessary to propagate the standard to nvcc set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_STANDARD 11) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) # ============================================================================== # GNUInstallDirs CMake module @@ -51,9 +52,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) include(GNUInstallDirs) -# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -G") -# set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -G") - ################# # BOOST ################# From df44615d46ac862b2d84f5e1793be60304559161 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 4 Feb 2020 19:32:31 +0100 Subject: [PATCH 105/285] [popsift] using nullptr --- src/popsift/popsift.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index bb1cc5eb..1ece8497 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -26,7 +26,7 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode _pipe._unused.push( new popsift::ImageFloat ); _pipe._unused.push( new popsift::ImageFloat ); } - _pipe._pyramid = 0; + _pipe._pyramid = nullptr; configure( config, true ); @@ -50,7 +50,7 @@ PopSift::PopSift( ImageMode imode ) _pipe._unused.push( new popsift::ImageFloat ); _pipe._unused.push( new popsift::ImageFloat ); } - _pipe._pyramid = 0; + _pipe._pyramid = nullptr; _pipe._thread_stage1 = new boost::thread( &PopSift::uploadImages, this ); _pipe._thread_stage2 = new boost::thread( &PopSift::extractDownloadLoop, this ); @@ -62,7 +62,7 @@ PopSift::~PopSift() bool PopSift::configure( const popsift::Config& config, bool force ) { - if( _pipe._pyramid != 0 ) { + if( _pipe._pyramid != nullptr ) { return false; } @@ -97,7 +97,7 @@ bool PopSift::private_init( int w, int h ) float upscaleFactor = _config.getUpscaleFactor(); float scaleFactor = 1.0f / powf( 2.0f, -upscaleFactor ); - if( p._pyramid != 0 ) { + if( p._pyramid != nullptr ) { p._pyramid->resetDimensions( _config, ceilf( w * scaleFactor ), ceilf( h * scaleFactor ) ); @@ -171,12 +171,12 @@ SiftJob* PopSift::enqueue( int w, void PopSift::uploadImages( ) { SiftJob* job; - while( ( job = _pipe._queue_stage1.pull() ) != 0 ) { + while( ( job = _pipe._queue_stage1.pull() ) != nullptr ) { popsift::ImageBase* img = _pipe._unused.pull(); job->setImg( img ); _pipe._queue_stage2.push( job ); } - _pipe._queue_stage2.push( 0 ); + _pipe._queue_stage2.push( nullptr ); } void PopSift::extractDownloadLoop( ) @@ -184,7 +184,7 @@ void PopSift::extractDownloadLoop( ) Pipe& p = _pipe; SiftJob* job; - while( ( job = p._queue_stage2.pull() ) != 0 ) { + while( ( job = p._queue_stage2.pull() ) != nullptr ) { popsift::ImageBase* img = job->getImg(); private_init( img->getWidth(), img->getHeight() ); @@ -217,7 +217,7 @@ void PopSift::matchPrepareLoop( ) Pipe& p = _pipe; SiftJob* job; - while( ( job = p._queue_stage2.pull() ) != 0 ) { + while( ( job = p._queue_stage2.pull() ) != nullptr ) { popsift::ImageBase* img = job->getImg(); private_init( img->getWidth(), img->getHeight() ); @@ -238,12 +238,12 @@ void PopSift::matchPrepareLoop( ) SiftJob::SiftJob( int w, int h, const unsigned char* imageData ) : _w(w) , _h(h) - , _img(0) + , _img(nullptr) { _f = _p.get_future(); _imageData = (unsigned char*)malloc( w*h ); - if( _imageData != 0 ) { + if( _imageData != nullptr ) { memcpy( _imageData, imageData, w*h ); } else { cerr << __FILE__ << ":" << __LINE__ << " Memory limitation" << endl @@ -255,12 +255,12 @@ SiftJob::SiftJob( int w, int h, const unsigned char* imageData ) SiftJob::SiftJob( int w, int h, const float* imageData ) : _w(w) , _h(h) - , _img(0) + , _img(nullptr) { _f = _p.get_future(); _imageData = (unsigned char*)malloc( w*h*sizeof(float) ); - if( _imageData != 0 ) { + if( _imageData != nullptr ) { memcpy( _imageData, imageData, w*h*sizeof(float) ); } else { cerr << __FILE__ << ":" << __LINE__ << " Memory limitation" << endl From 1f138d90246164770d413cc673a5045e4ada9344 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 4 Feb 2020 19:34:23 +0100 Subject: [PATCH 106/285] [popsift] adding state attribute _isInit and moving uninit in Pipe fix #70 --- src/popsift/popsift.cpp | 52 ++++++++++++++++++++++++++++++----------- src/popsift/popsift.h | 20 ++++++++++++---- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 1ece8497..ee6dbf52 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -14,7 +14,7 @@ using namespace std; PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode, ImageMode imode ) - : _image_mode( imode ) + : _image_mode( imode ), _isInit(true) { if( imode == ByteImages ) { @@ -38,7 +38,7 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode } PopSift::PopSift( ImageMode imode ) - : _image_mode( imode ) + : _image_mode( imode ), _isInit(true) { if( imode == ByteImages ) { @@ -58,6 +58,10 @@ PopSift::PopSift( ImageMode imode ) PopSift::~PopSift() { + if(_isInit) + { + uninit(); + } } bool PopSift::configure( const popsift::Config& config, bool force ) @@ -121,19 +125,14 @@ bool PopSift::private_init( int w, int h ) void PopSift::uninit( ) { - _pipe._queue_stage1.push( 0 ); - _pipe._thread_stage2->join(); - _pipe._thread_stage1->join(); - delete _pipe._thread_stage2; - delete _pipe._thread_stage1; - - while( !_pipe._unused.empty() ) { - popsift::ImageBase* img = _pipe._unused.pull(); - delete img; + if(!_isInit) + { + std::cout << "[warning] Attempt to release resources from an uninitialized instance" << std::endl; + return; } + _pipe.uninit(); - delete _pipe._pyramid; - _pipe._pyramid = 0; + _isInit = false; } SiftJob* PopSift::enqueue( int w, @@ -317,3 +316,30 @@ popsift::FeaturesDev* SiftJob::getDev() return dynamic_cast( _f.get() ); } +void PopSift::Pipe::uninit() +{ + _queue_stage1.push( nullptr ); + if(_thread_stage2 != nullptr) + { + _thread_stage2->join(); + delete _thread_stage2; + } + if(_thread_stage1 != nullptr) + { + _thread_stage1->join(); + delete _thread_stage1; + } + + while( !_unused.empty() ) + { + popsift::ImageBase* img = _unused.pull(); + delete img; + } + + if(_pyramid != nullptr) + { + delete _pyramid; + _pyramid = nullptr; + } + +} diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 9f410dfb..126e83e3 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -82,6 +82,11 @@ class PopSift popsift::ImageBase* _current; popsift::Pyramid* _pyramid; + + /** + * @brief Release the allocated resources, if any. + */ + void uninit(); }; public: @@ -95,8 +100,8 @@ class PopSift /* We support more than 1 streams, but we support only one sigma and one * level parameters. */ - PopSift( ImageMode imode = ByteImages ); - PopSift( const popsift::Config& config, + explicit PopSift( ImageMode imode = ByteImages ); + explicit PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode = popsift::Config::ExtractingMode, ImageMode imode = ByteImages ); ~PopSift(); @@ -118,10 +123,14 @@ class PopSift int h, const float* imageData ); - /** deprecated */ + /** + * @deprecated + * */ inline void uninit( int /*pipe*/ ) { uninit(); } - /** deprecated */ + /** + * @deprecated + **/ inline bool init( int /*pipe*/, int w, int h ) { _last_init_w = w; _last_init_h = h; @@ -164,5 +173,8 @@ class PopSift int _last_init_w; /* to support depreacted interface */ int _last_init_h; /* to support depreacted interface */ ImageMode _image_mode; + + /// whether the object is initialized + bool _isInit{false}; }; From e2f56ba9d4d30459cb67fb51a0674ddf1e2f3ba4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 5 Feb 2020 11:55:08 +0100 Subject: [PATCH 107/285] [popsift] removed unused member _current in Pipe --- src/popsift/popsift.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 126e83e3..be3d18e4 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -79,7 +79,6 @@ class PopSift boost::sync_queue _queue_stage1; boost::sync_queue _queue_stage2; boost::sync_queue _unused; - popsift::ImageBase* _current; popsift::Pyramid* _pyramid; From 82296d793c945495b5f68f17986a9eabdd39c31e Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 5 Feb 2020 11:55:30 +0100 Subject: [PATCH 108/285] [popsift] init members of Pipe --- src/popsift/popsift.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index be3d18e4..5b5b771d 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -74,13 +74,13 @@ class PopSift { struct Pipe { - boost::thread* _thread_stage1; - boost::thread* _thread_stage2; + boost::thread* _thread_stage1{nullptr}; + boost::thread* _thread_stage2{nullptr}; boost::sync_queue _queue_stage1; boost::sync_queue _queue_stage2; boost::sync_queue _unused; - popsift::Pyramid* _pyramid; + popsift::Pyramid* _pyramid{nullptr}; /** * @brief Release the allocated resources, if any. From 0c4e98a13836d45937b532d4fc9f7495f9cba72a Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 5 Feb 2020 11:56:04 +0100 Subject: [PATCH 109/285] [popsift] set nullptr after delete --- src/popsift/popsift.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index ee6dbf52..3de997ec 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -323,11 +323,13 @@ void PopSift::Pipe::uninit() { _thread_stage2->join(); delete _thread_stage2; + _thread_stage2 = nullptr; } if(_thread_stage1 != nullptr) { _thread_stage1->join(); delete _thread_stage1; + _thread_stage1 = nullptr; } while( !_unused.empty() ) From 2ec970e43bc7e448b605ee391b84e671c8d65043 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 5 Feb 2020 11:58:37 +0100 Subject: [PATCH 110/285] [popsift] no need to check nullptr if only delete https://isocpp.org/wiki/faq/freestore-mgmt#delete-handles-null --- src/popsift/popsift.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 3de997ec..f7e477d2 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -338,10 +338,7 @@ void PopSift::Pipe::uninit() delete img; } - if(_pyramid != nullptr) - { - delete _pyramid; - _pyramid = nullptr; - } + delete _pyramid; + _pyramid = nullptr; } From d6b581c9008c720252a5b2a5880c7cbf8bf6538d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 9 Feb 2020 23:30:14 +0100 Subject: [PATCH 111/285] [popsift] using unique_ptr instead of raw pointer for threads --- src/popsift/popsift.cpp | 16 +++++++--------- src/popsift/popsift.h | 4 ++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index f7e477d2..65f664a2 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -30,11 +30,11 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode configure( config, true ); - _pipe._thread_stage1 = new boost::thread( &PopSift::uploadImages, this ); + _pipe._thread_stage1.reset( new boost::thread( &PopSift::uploadImages, this )); if( mode == popsift::Config::ExtractingMode ) - _pipe._thread_stage2 = new boost::thread( &PopSift::extractDownloadLoop, this ); + _pipe._thread_stage2.reset( new boost::thread( &PopSift::extractDownloadLoop, this )); else - _pipe._thread_stage2 = new boost::thread( &PopSift::matchPrepareLoop, this ); + _pipe._thread_stage2.reset( new boost::thread( &PopSift::matchPrepareLoop, this )); } PopSift::PopSift( ImageMode imode ) @@ -52,8 +52,8 @@ PopSift::PopSift( ImageMode imode ) } _pipe._pyramid = nullptr; - _pipe._thread_stage1 = new boost::thread( &PopSift::uploadImages, this ); - _pipe._thread_stage2 = new boost::thread( &PopSift::extractDownloadLoop, this ); + _pipe._thread_stage1.reset( new boost::thread( &PopSift::uploadImages, this )); + _pipe._thread_stage2.reset( new boost::thread( &PopSift::extractDownloadLoop, this )); } PopSift::~PopSift() @@ -322,14 +322,12 @@ void PopSift::Pipe::uninit() if(_thread_stage2 != nullptr) { _thread_stage2->join(); - delete _thread_stage2; - _thread_stage2 = nullptr; + _thread_stage2.reset(nullptr); } if(_thread_stage1 != nullptr) { _thread_stage1->join(); - delete _thread_stage1; - _thread_stage1 = nullptr; + _thread_stage1.reset(nullptr); } while( !_unused.empty() ) diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 5b5b771d..7fd70e03 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -74,8 +74,8 @@ class PopSift { struct Pipe { - boost::thread* _thread_stage1{nullptr}; - boost::thread* _thread_stage2{nullptr}; + std::unique_ptr _thread_stage1{nullptr}; + std::unique_ptr _thread_stage2{nullptr}; boost::sync_queue _queue_stage1; boost::sync_queue _queue_stage2; boost::sync_queue _unused; From 10ea9a0534d1bfc535fcdd922c9516cc2c825a2d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 13 Feb 2020 15:27:46 +0100 Subject: [PATCH 112/285] [cmake] minimum version 3.12 --- CMakeLists.txt | 2 +- src/application/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d4975941..3134d68c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # CMake below 3.4 does not work with CUDA separable compilation at all -cmake_minimum_required(VERSION 3.4) +cmake_minimum_required(VERSION 3.12) project(PopSift VERSION 1.0.0 LANGUAGES CXX) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 54f80bf4..2661fe36 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.12) project(PopsiftDemo) if(TARGET popsift) From 845cba432c72f50ac9edc2fab6dce4f02de9e031 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 13 Feb 2020 15:29:48 +0100 Subject: [PATCH 113/285] [doc] add changelog --- CHANGES.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 CHANGES.md diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 00000000..5aff60e1 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,22 @@ +# PopSift Changelog + +All notable changes to this project are documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +### Changed + +### Fixed + +### Removed + +## [1.0.0] - YYYY-MM-DD + +- Support for cuda 9 and 10 +- Support for RTX architecture +- Optional grid filtering From 1f218a93164d396fff5612cca8760be3254ceb22 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 21 Feb 2020 09:30:24 +0100 Subject: [PATCH 114/285] [doc] describe float* input option to readme --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e23b4fea..8de8c0f7 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,9 @@ Some build options are available: ## Usage -Two artifacts are made: `libpopsift` and, if enabled, the test application `popsift-demo`. Calling `popsift-demo` without parameters shows the options. +The main artifact created is `libpopsift`. +If enabled, the test application `popsift-demo` is created as well. +Calling `popsift-demo` without parameters shows the options. ### Using PopSift as third party @@ -84,7 +86,9 @@ cmake .. -DPopSift_DIR=/lib/cmake/PopSift/ The caller must create a `popart::Config` struct (documented in `src/sift/sift_conf.h`) to control the behaviour of the PopSift, and instantiate an object of class `PopSift` (found in `src/sift/popsift.h`). After this, images can be enqueued for SIFT extraction using (`enqueue()`). -The only valid input format is a single plane of grayscale unsigned characters. +A valid input is a single plane of grayscale values located in host memory. +They can passed as a pointer to unsigned char, with a value range from 0 to 255, or as a pointer to float, with a value range from 0.0f to 1.0f. + Only host memory limits the number of images that can be enqueued. The `enqueue` function returns a pointer to a `SiftJob` immediately and performs the feature extraction asynchronously. The memory of the image passed to enqueue remains the caller's responsibility. Calling `SiftJob::get` on the returned job blocks until features are extracted, and returns them. @@ -134,4 +138,4 @@ PopSift was developed within the project [POPART](http://www.popartproject.eu), ___ -[1]: Lowe, D. G. (2004). Distinctive Image Features from Scale-Invariant Keypoints. International Journal of Computer Vision, 60(2), 91–110. doi:10.1023/B:VISI.0000029664.99615.94 \ No newline at end of file +[1]: Lowe, D. G. (2004). Distinctive Image Features from Scale-Invariant Keypoints. International Journal of Computer Vision, 60(2), 91–110. doi:10.1023/B:VISI.0000029664.99615.94 From 63cc139906b1587274cd7388f2df101ea3231bb8 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 21 Feb 2020 12:23:30 +0100 Subject: [PATCH 115/285] [doc] added CII badge --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8de8c0f7..acfdab6e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -# PopSift +# PopSift +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) -PopSift is an implementation of the SIFT algorithm in CUDA. +PopSift is an open-source implementation of the SIFT algorithm in CUDA. PopSift tries to stick as closely as possible to David Lowe's famous paper [1], while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. ## HW requirements From d2d7618bcc62e6bfff9285652af780f9995fa77f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 21 Feb 2020 12:40:44 +0100 Subject: [PATCH 116/285] [popsift] no need to explicit init --- src/popsift/popsift.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 7fd70e03..79702ceb 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -74,8 +74,8 @@ class PopSift { struct Pipe { - std::unique_ptr _thread_stage1{nullptr}; - std::unique_ptr _thread_stage2{nullptr}; + std::unique_ptr _thread_stage1; + std::unique_ptr _thread_stage2; boost::sync_queue _queue_stage1; boost::sync_queue _queue_stage2; boost::sync_queue _unused; From 72796c865b14a380ff1d66d4b62db5dd07791e9c Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 21 Feb 2020 13:41:07 +0100 Subject: [PATCH 117/285] [popsift] removed default and copy constructors --- src/popsift/popsift.cpp | 4 ++-- src/popsift/popsift.h | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 65f664a2..1644110e 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -14,7 +14,7 @@ using namespace std; PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode, ImageMode imode ) - : _image_mode( imode ), _isInit(true) + : _image_mode( imode ) { if( imode == ByteImages ) { @@ -38,7 +38,7 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode } PopSift::PopSift( ImageMode imode ) - : _image_mode( imode ), _isInit(true) + : _image_mode( imode ) { if( imode == ByteImages ) { diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 79702ceb..860c85fb 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -96,6 +96,10 @@ class PopSift }; public: + + PopSift() = delete; + PopSift(const PopSift&) = delete; + /* We support more than 1 streams, but we support only one sigma and one * level parameters. */ @@ -174,6 +178,6 @@ class PopSift ImageMode _image_mode; /// whether the object is initialized - bool _isInit{false}; + bool _isInit{true}; }; From d6204cfb3ffda78a1088ca5bf71f56c1479b3bec Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 21 Feb 2020 13:41:43 +0100 Subject: [PATCH 118/285] [popsift] fix uninit members --- src/popsift/popsift.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 860c85fb..93bfa6e2 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -173,8 +173,8 @@ class PopSift */ popsift::Config _shadow_config; - int _last_init_w; /* to support depreacted interface */ - int _last_init_h; /* to support depreacted interface */ + int _last_init_w{}; /* to support deprecated interface */ + int _last_init_h{}; /* to support deprecated interface */ ImageMode _image_mode; /// whether the object is initialized From fb033e43be945f51ec871a0bfaaa416cd3d909cf Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 21 Feb 2020 15:39:28 +0100 Subject: [PATCH 119/285] [cmake] add language CXX to apps --- src/application/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index bc4b9fdc..cdae3079 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -project(PopsiftDemo) +project(PopsiftDemo LANGUAGES CXX) if(TARGET popsift) # when compiled in the repository the target is already defined From da4aeda921f2cb80e116ee5c43eda2112235cc53 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 24 Feb 2020 09:06:22 +0100 Subject: [PATCH 120/285] [popsift] remove redundant 0-initialization --- src/popsift/popsift.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 1644110e..29b7dc6b 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -26,7 +26,6 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode _pipe._unused.push( new popsift::ImageFloat ); _pipe._unused.push( new popsift::ImageFloat ); } - _pipe._pyramid = nullptr; configure( config, true ); @@ -50,7 +49,6 @@ PopSift::PopSift( ImageMode imode ) _pipe._unused.push( new popsift::ImageFloat ); _pipe._unused.push( new popsift::ImageFloat ); } - _pipe._pyramid = nullptr; _pipe._thread_stage1.reset( new boost::thread( &PopSift::uploadImages, this )); _pipe._thread_stage2.reset( new boost::thread( &PopSift::extractDownloadLoop, this )); From 956bf2213a8eceb4e7560545d1b61883858698b5 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 25 Feb 2020 09:34:05 +0100 Subject: [PATCH 121/285] [cmake] add message when popsift found in apps --- src/application/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 2661fe36..0e9d86cc 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -10,6 +10,7 @@ else() # See 5: # * http://www.cmake.org/cmake/help/v3.0/command/find_package.html find_package(PopSift CONFIG REQUIRED) + message(STATUS "Found PopSift, version: ${PopSift_VERSION}") endif() find_package(Boost 1.53.0 REQUIRED COMPONENTS program_options system filesystem) From 9a7c9ecebb060febf7cca0e842349f9001a87324 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 25 Feb 2020 11:59:28 +0100 Subject: [PATCH 122/285] [cmake] first attempt at CMake script for choosing CCs --- cmake/ChooseCudaCC.cmake | 81 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100755 cmake/ChooseCudaCC.cmake diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake new file mode 100755 index 00000000..f5e3e733 --- /dev/null +++ b/cmake/ChooseCudaCC.cmake @@ -0,0 +1,81 @@ +# +# after returning from this function, do not forget to call the following: +# set(RESULT_NAME ${RESULT_NAME} CACHE STRING "CUDA CC versions to compile") +# replacing your own variable for RESULT_NAME +# +# We assume that MINCUDAVERSION defaults to 7.0 +# +function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) + if(NOT DEFINED ${MINCC}) + message(FATAL_ERROR "CMake function ChooseCudaCC must be called with a minimal CC") + endif() + if(NOT DEFINED ${MINCUDAVERSION}) + set(MINCUDAVERSION 70) + endif() + + find_package(CUDA ${MINCUDAVERSION} REQUIRED) + + if(NOT CUDA_FOUND) + message(FATAL_ERROR "Could not find CUDA >= 7.0") + endif() + + # + # Create a list of possible CCs for each host processor. + # This may require tuning: CUDA cards exist in AIX machines with POWER CPUs, + # it is possible that non-Tegra ARM systems exist as well. + # For now, this is my best guess. + # + if((CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")) + set(CC_LIST_BY_SYSTEM_PROCESSOR 20 21 30 35 50 52 60 61 70 75) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set(CC_LIST_BY_SYSTEM_PROCESSOR 32 53 62 72) + else() + message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}") + endif() + # + # Default setting of the CUDA CC versions to compile. + # Shortening the lists saves a lot of compile time. + # + set(CUDA_MIN_CC 20) + set(CUDA_MAX_CC 75) + if(CUDA_VERSION_MAJOR GREATER_EQUAL 10) + set(CUDA_MIN_CC 30) + elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9) + set(CUDA_MIN_CC 30) + set(CUDA_MAX_CC 72) + elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8) + set(CUDA_MAX_CC 62) + elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7) + set(CUDA_MAX_CC 53) + else() + message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0") + endif() + + set(CC_LIST "") + foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR}) + if( (${CC} GREATER ${MINCC}) AND + (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND + (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) + list(APPEND CC_LIST ${CC}) + endif() + endforeach() + + # + # Add all requested CUDA CCs to the command line for offline compilation + # + list(SORT CC_LIST) + foreach(CC_VERSION ${CC_LIST}) + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") + endforeach() + + # + # Use the highest request CUDA CC for CUDA JIT compilation + # + list(LENGTH CC_LIST CC_LIST_LEN) + MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") + list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") + + set(${RESULT_NAME} ${CC_LIST} PARENT_SCOPE) +endfunction() + From a12d236cec2b96e5aa86ee293df4eb446bcdd1a5 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 25 Feb 2020 13:54:17 +0100 Subject: [PATCH 123/285] [cmake] fix parameter list for ChooseCudaCC --- cmake/ChooseCudaCC.cmake | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index f5e3e733..f055a0fe 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -3,14 +3,15 @@ # set(RESULT_NAME ${RESULT_NAME} CACHE STRING "CUDA CC versions to compile") # replacing your own variable for RESULT_NAME # +# We assume that MINCC default to 20 # We assume that MINCUDAVERSION defaults to 7.0 # function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) - if(NOT DEFINED ${MINCC}) - message(FATAL_ERROR "CMake function ChooseCudaCC must be called with a minimal CC") + if(NOT DEFINED MINCC) + set(MINCC 20) endif() - if(NOT DEFINED ${MINCUDAVERSION}) - set(MINCUDAVERSION 70) + if(NOT DEFINED MINCUDAVERSION) + set(MINCUDAVERSION 7.0) endif() find_package(CUDA ${MINCUDAVERSION} REQUIRED) @@ -32,6 +33,7 @@ function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) else() message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}") endif() + # # Default setting of the CUDA CC versions to compile. # Shortening the lists saves a lot of compile time. @@ -63,9 +65,10 @@ function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) # # Add all requested CUDA CCs to the command line for offline compilation # + set(GENCODE_FLAGS "${CUDA_NVCC_FLAGS}") list(SORT CC_LIST) foreach(CC_VERSION ${CC_LIST}) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") + set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") endforeach() # @@ -74,8 +77,13 @@ function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) list(LENGTH CC_LIST CC_LIST_LEN) MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") + set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") + # + # Two variables are exported to the parent scope. One is passed through the + # environment (CUDA_NVCC_FLAGS), the other is passed by name (RESULT_NAME) + # + set(CUDA_NVCC_FLAGS ${GENCODE_FLAGS} PARENT_SCOPE) set(${RESULT_NAME} ${CC_LIST} PARENT_SCOPE) endfunction() From 6856846c63cefc0f55d106f3447440cdb5ad6bb5 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 25 Feb 2020 13:54:58 +0100 Subject: [PATCH 124/285] [cmake] use cmake helper function ChooseCudaCC --- CMakeLists.txt | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3134d68c..cc0a25cf 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,16 +86,16 @@ if(NOT CUDA_FOUND) message(FATAL_ERROR "Could not find CUDA >= 7.0") endif() -# -# Default setting of the CUDA CC versions to compile. -# Shortening the lists saves a lot of compile time. -# -if(CUDA_VERSION_MAJOR GREATER 7) - set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62) -else() - set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 ) +message(STATUS "CUDA Version is ${CUDA_VERSION}") + +if(NOT DEFINED PopSift_CUDA_CC_LIST) + include(ChooseCudaCC) + set(PopSift_MIN_CC 30) + set(PopSift_MIN_CUDA_VERSION 7.0) + ChooseCudaCC(PopSift_CUDA_CC_LIST_BASIC ${PopSift_MIN_CC} ${PopSift_MIN_CUDA_VERSION}) + set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") endif() -set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") +message(STATUS "Compiling for CUDA CCs ${PopSift_CUDA_CC_LIST}") if(PopSift_USE_NVTX_PROFILING) message(STATUS "PROFILING CPU CODE: NVTX is in use") @@ -109,7 +109,7 @@ endif() set(CUDA_SEPARABLE_COMPILATION ON) if(UNIX AND NOT APPLE) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic;-lineinfo") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic") # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep") @@ -129,30 +129,11 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC") endif() -# -# Add all requested CUDA CCs to the command line for offline compilation -# -list(SORT PopSift_CUDA_CC_LIST) -foreach(PopSift_CC_VERSION ${PopSift_CUDA_CC_LIST}) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CC_VERSION},code=sm_${PopSift_CC_VERSION}") -endforeach() - -# -# Use the highest request CUDA CC for CUDA JIT compilation -# -list(LENGTH PopSift_CUDA_CC_LIST PopSift_CC_LIST_LEN) -MATH(EXPR PopSift_CC_LIST_LEN "${PopSift_CC_LIST_LEN}-1") -list(GET PopSift_CUDA_CC_LIST ${PopSift_CC_LIST_LEN} PopSift_CUDA_CC_LIST_LAST) -set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-gencode;arch=compute_${PopSift_CUDA_CC_LIST_LAST},code=compute_${PopSift_CUDA_CC_LIST_LAST}") - -# default stream legacy implies that the 0 stream synchronizes all streams # default stream per-thread implies that each host thread has one non-synchronizing 0-stream # currently, the code requires legacy mode set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy") # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread") -message(STATUS "CUDA Version is ${CUDA_VERSION}") -message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) if(PopSift_NVCC_WARNINGS) set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage") From 21baa3e692975565e7030bfbfe44087a5f8c5d83 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 25 Feb 2020 13:57:18 +0100 Subject: [PATCH 125/285] [cmake] correct CUDA version error output --- cmake/ChooseCudaCC.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index f055a0fe..e908efaf 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -17,7 +17,7 @@ function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) find_package(CUDA ${MINCUDAVERSION} REQUIRED) if(NOT CUDA_FOUND) - message(FATAL_ERROR "Could not find CUDA >= 7.0") + message(FATAL_ERROR "Could not find CUDA >= ${MINCUDAVERSION}") endif() # From 717f108aa82409c0afc79cbded4fab689b3f8741 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 25 Feb 2020 14:22:46 +0100 Subject: [PATCH 126/285] [cuda] remove partially incompatible launch bound --- src/popsift/s_extrema.cu | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/popsift/s_extrema.cu b/src/popsift/s_extrema.cu index d2c82de7..8bcdd28d 100644 --- a/src/popsift/s_extrema.cu +++ b/src/popsift/s_extrema.cu @@ -506,9 +506,6 @@ bool find_extrema_in_dog_sub( cudaTextureObject_t dog, template __global__ -#ifdef NDEBUG -__launch_bounds__(128,16) -#endif void find_extrema_in_dog( cudaTextureObject_t dog, int octave, int width, From ec84e9c36aeda6d782fe48d519cc56729864eca3 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 25 Feb 2020 14:24:26 +0100 Subject: [PATCH 127/285] [cmake] fix camelCase --- CMakeLists.txt | 4 ++-- cmake/ChooseCudaCC.cmake | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cc0a25cf..20ed625b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ if(NOT DEFINED PopSift_CUDA_CC_LIST) include(ChooseCudaCC) set(PopSift_MIN_CC 30) set(PopSift_MIN_CUDA_VERSION 7.0) - ChooseCudaCC(PopSift_CUDA_CC_LIST_BASIC ${PopSift_MIN_CC} ${PopSift_MIN_CUDA_VERSION}) + chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC ${PopSift_MIN_CC} ${PopSift_MIN_CUDA_VERSION}) set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") endif() message(STATUS "Compiling for CUDA CCs ${PopSift_CUDA_CC_LIST}") @@ -208,4 +208,4 @@ message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) message("\n******************************************") -message("\n") \ No newline at end of file +message("\n") diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index e908efaf..a5da2ed3 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -6,7 +6,7 @@ # We assume that MINCC default to 20 # We assume that MINCUDAVERSION defaults to 7.0 # -function(ChooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) +function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) if(NOT DEFINED MINCC) set(MINCC 20) endif() From dd3949bda88af27fb6e8ed9158e49373bc6e959c Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Wed, 26 Feb 2020 08:48:18 +0100 Subject: [PATCH 128/285] [cmake] add vocal parameters, handle cached CCs --- CMakeLists.txt | 48 ++++++++-------- cmake/ChooseCudaCC.cmake | 118 ++++++++++++++++++++++++++++++--------- 2 files changed, 119 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 20ed625b..84981a9d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,14 +88,18 @@ endif() message(STATUS "CUDA Version is ${CUDA_VERSION}") +include(ChooseCudaCC) if(NOT DEFINED PopSift_CUDA_CC_LIST) - include(ChooseCudaCC) - set(PopSift_MIN_CC 30) - set(PopSift_MIN_CUDA_VERSION 7.0) - chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC ${PopSift_MIN_CC} ${PopSift_MIN_CUDA_VERSION}) + chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC + PopSift_CUDA_GENCODE_FLAGS + MIN_CC 30 + MIN_CUDA_VERSION 7.0) set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") +else() + getFlagsForCudaCCList(PopSift_CUDA_CC_LIST + PopSift_CUDA_GENCODE_FLAGS) endif() -message(STATUS "Compiling for CUDA CCs ${PopSift_CUDA_CC_LIST}") +list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}") if(PopSift_USE_NVTX_PROFILING) message(STATUS "PROFILING CPU CODE: NVTX is in use") @@ -103,17 +107,17 @@ endif() if(PopSift_ERRCHK_AFTER_KERNEL) message(STATUS "Synchronizing and checking errors after every kernel call") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DERRCHK_AFTER_KERNEL") + list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL") endif() set(CUDA_SEPARABLE_COMPILATION ON) if(UNIX AND NOT APPLE) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-rdynamic") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--keep") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--source-in-ptx") + list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic") + # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") + # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") + list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep") + list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx") endif() # The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that @@ -121,35 +125,35 @@ endif() # see https://gitlab.kitware.com/cmake/cmake/issues/16411 if(CMAKE_BUILD_TYPE STREQUAL "Debug") message(STATUS "Building in debug mode") - set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG};-G") + list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G") endif() -set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-O3") +list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3") if(PopSift_USE_POSITION_INDEPENDENT_CODE) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler;-fPIC") + list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") endif() # default stream per-thread implies that each host thread has one non-synchronizing 0-stream # currently, the code requires legacy mode -set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;legacy") +list(APPEND CUDA_NVCC_FLAGS "--default-stream;legacy") # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--default-stream;per-thread") -if( ( CUDA_VERSION VERSION_EQUAL "7.5" ) OR ( CUDA_VERSION VERSION_GREATER "7.5") ) +if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") if(PopSift_NVCC_WARNINGS) - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-lmem-usage") - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;-warn-spills") - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-local-memory-usage") - set(CUDA_NVCC_FLAGS_RELEASE "${CUDA_NVCC_FLAGS_RELEASE};-Xptxas;--warn-on-spills") + list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-lmem-usage") + list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;-warn-spills") + list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-local-memory-usage") + list(APPEND CUDA_NVCC_FLAGS_RELEASE "-Xptxas;--warn-on-spills") endif() endif() -if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER "7.4") +if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5") set(PopSift_HAVE_NORMF 1) else() set(PopSift_HAVE_NORMF 0) endif() -if( ( CUDA_VERSION VERSION_EQUAL "9.0" ) OR ( CUDA_VERSION VERSION_GREATER "9.0") ) +if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") set(HAVE_SHFL_DOWN_SYNC 1) else() set(HAVE_SHFL_DOWN_SYNC 0) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index a5da2ed3..684b9172 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -1,23 +1,46 @@ # -# after returning from this function, do not forget to call the following: -# set(RESULT_NAME ${RESULT_NAME} CACHE STRING "CUDA CC versions to compile") -# replacing your own variable for RESULT_NAME +# CUDA hardware and SDKs are developing over time, different SDK support different +# hardware, and supported hardware differs depending on platform even for the same +# SDK version. +# This file attempts to provide a function that returns a valid selection of hardware +# for the current SDK and platform. # -# We assume that MINCC default to 20 -# We assume that MINCUDAVERSION defaults to 7.0 +# It will require updates as CUDA develops, and it is currently not complete in terms +# of existing platforms that support CUDA. # -function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) - if(NOT DEFINED MINCC) - set(MINCC 20) +# This function does not edit cache entries or variables in the parent scope +# except for the variables whose names are supplied for SUPPORTED_CC and +# SUPPORTED_GENCODE_FLAGS +# +# You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to +# CUDA_NVCC_FLAGS. +# Like this: +# set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile") +# end +# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}") +# +# We assume that ${SUPPORTED_CC} can be overwritten. +# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten. +# We assume that MIN_CC default to 20 +# We assume that MIN_CUDA_VERSION defaults to 7.0 +# +function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) + set(options "") + set(oneValueArgs MIN_CUDA_VERSION MIN_CC) + set(multipleValueArgs "") + cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN}) + + if(NOT DEFINED CHOOSE_CUDA_MIN_CC) + set(CHOOSE_CUDA_MIN_CC 20) endif() - if(NOT DEFINED MINCUDAVERSION) - set(MINCUDAVERSION 7.0) + if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION) + set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0) endif() - find_package(CUDA ${MINCUDAVERSION} REQUIRED) + find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED) if(NOT CUDA_FOUND) - message(FATAL_ERROR "Could not find CUDA >= ${MINCUDAVERSION}") + message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}") endif() # @@ -26,11 +49,17 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) # it is possible that non-Tegra ARM systems exist as well. # For now, this is my best guess. # - if((CMAKE_SYSTEM_PROCESSOR STREQUAL "i686") OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")) - set(CC_LIST_BY_SYSTEM_PROCESSOR 20 21 30 35 50 52 60 61 70 75) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") - set(CC_LIST_BY_SYSTEM_PROCESSOR 32 53 62 72) - else() + set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64") + set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64") + + set(CC_LIST_BY_SYSTEM_PROCESSOR "") + if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) + list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75") + endif() + if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) + list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") + endif() + if(NOT CC_LIST_BY_SYSTEM_PROCESSOR) message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}") endif() @@ -52,11 +81,13 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) else() message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0") endif() + if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC}) + set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC}) + endif() set(CC_LIST "") foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR}) - if( (${CC} GREATER ${MINCC}) AND - (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND + if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) list(APPEND CC_LIST ${CC}) endif() @@ -65,10 +96,46 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) # # Add all requested CUDA CCs to the command line for offline compilation # - set(GENCODE_FLAGS "${CUDA_NVCC_FLAGS}") + set(GENCODE_FLAGS "") + list(SORT CC_LIST) + foreach(CC_VERSION ${CC_LIST}) + list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") + endforeach() + + # + # Use the highest request CUDA CC for CUDA JIT compilation + # + list(LENGTH CC_LIST CC_LIST_LEN) + MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") + list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) + list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") + + # + # Two variables are exported to the parent scope. One is passed through the + # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) + # + set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) + set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE) +endfunction() + +# +# This function is used to create a list of gencode instructions for a given list +# of CCs. +# It takes as arguments is list of CCs and a list variable that can be filled with +# gencode strings. +# +# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten. +# +function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) + set(CC_LIST "${${INPUT_CC_LIST}}") + + # + # Add all requested CUDA CCs to the command line for offline compilation + # + set(GENCODE_FLAGS "") list(SORT CC_LIST) foreach(CC_VERSION ${CC_LIST}) - set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") + list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") endforeach() # @@ -77,13 +144,14 @@ function(chooseCudaCC RESULT_NAME MINCC MINCUDAVERSION) list(LENGTH CC_LIST CC_LIST_LEN) MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) - set(GENCODE_FLAGS "${GENCODE_FLAGS};-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") + list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") + + message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}") # # Two variables are exported to the parent scope. One is passed through the - # environment (CUDA_NVCC_FLAGS), the other is passed by name (RESULT_NAME) + # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) # - set(CUDA_NVCC_FLAGS ${GENCODE_FLAGS} PARENT_SCOPE) - set(${RESULT_NAME} ${CC_LIST} PARENT_SCOPE) + set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) endfunction() From 85946b30a67a745600df9cc9e0460d9474ef4684 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 14:14:35 +0100 Subject: [PATCH 129/285] [doc] added codacy badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index acfdab6e..b2cf664f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # PopSift -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/8b0f7a68bc0d4df2ac89c6e732917caa)](https://app.codacy.com/manual/alicevision/popsift?utm_source=github.com&utm_medium=referral&utm_content=alicevision/popsift&utm_campaign=Badge_Grade_Settings) PopSift is an open-source implementation of the SIFT algorithm in CUDA. PopSift tries to stick as closely as possible to David Lowe's famous paper [1], while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. From 9ccbec310c06460367c2227ce78d1a2be17c09f3 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 24 Feb 2020 11:19:42 +0100 Subject: [PATCH 130/285] [ci] add debug build --- .travis.yml | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98fc8264..d3edeb97 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,15 +23,18 @@ env: global: - NUM_CPU="`grep processor /proc/cpuinfo | wc -l`"; echo $NUM_CPU - - BUILD_TYPE="RELEASE" - BUILD_SYSTEM="`uname -s`" - BUILD_PROCESSOR="`uname -p`" - POPSIFT_SOURCE=${TRAVIS_BUILD_DIR} - - POPSIFT_BUILD=${TRAVIS_BUILD_DIR}/build - - POPSIFT_INSTALL=${POPSIFT_BUILD}/install + - POPSIFT_BUILD_RELEASE=${TRAVIS_BUILD_DIR}/build_release + - POPSIFT_BUILD_DEBUG=${TRAVIS_BUILD_DIR}/build_debug + - POPSIFT_INSTALL_RELEASE=${POPSIFT_BUILD_RELEASE}/install + - POPSIFT_INSTALL_DEBUG=${POPSIFT_BUILD_DEBUG}/install - POPSIFT_APP_SRC=${POPSIFT_SOURCE}/src/application - - POPSIFT_APP_BUILD=${POPSIFT_APP_SRC}/build - - POPSIFT_APP_INSTALL=${POPSIFT_APP_BUILD}/install + - POPSIFT_APP_BUILD_RELEASE=${POPSIFT_APP_SRC}/build_release + - POPSIFT_APP_BUILD_DEBUG=${POPSIFT_APP_SRC}/build_debug + - POPSIFT_APP_INSTALL_RELEASE=${POPSIFT_APP_BUILD_RELEASE}/install + - POPSIFT_APP_INSTALL_DEBUG=${POPSIFT_APP_BUILD_DEBUG}/install # CMAKE # - CMAKE_URL="https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.tar.gz" - CMAKE_URL="https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.tar.gz" @@ -66,23 +69,44 @@ install: - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda before_script: - # Create build folder - - mkdir -p ${POPSIFT_BUILD} - - cd ${POPSIFT_BUILD} # Classic release build + # Create build folder + - mkdir -p ${POPSIFT_BUILD_RELEASE} + - cd ${POPSIFT_BUILD_RELEASE} + - > + cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL_RELEASE} -DCMAKE_BUILD_TYPE=Release + + # Classic debug build + # Create build folder + - mkdir -p ${POPSIFT_BUILD_DEBUG} + - cd ${POPSIFT_BUILD_DEBUG} - > - cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL} + cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL_DEBUG} -DCMAKE_BUILD_TYPE=Debug script: + - cd ${POPSIFT_BUILD_RELEASE} # limit GCC builds to a reduced number of thread for the virtual machine - make install -j 2 VERBOSE=1 # Perform unit tests # - make test # Perform tests building application with PopSift as 3rd party - cd ${POPSIFT_APP_SRC} - - mkdir -p ${POPSIFT_APP_BUILD} - - cd ${POPSIFT_APP_BUILD} - - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL} + - mkdir -p ${POPSIFT_APP_BUILD_RELEASE} + - cd ${POPSIFT_APP_BUILD_RELEASE} + - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL_RELEASE}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL_RELEASE} -DCMAKE_BUILD_TYPE=Release + - make install -j 2 VERBOSE=1 + +# same for debug + - cd ${POPSIFT_BUILD_DEBUG} + # limit GCC builds to a reduced number of thread for the virtual machine + - make install -j 2 VERBOSE=1 + # Perform unit tests + # - make test + # Perform tests building application with PopSift as 3rd party + - cd ${POPSIFT_APP_SRC} + - mkdir -p ${POPSIFT_APP_BUILD_DEBUG} + - cd ${POPSIFT_APP_BUILD_DEBUG} + - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL_DEBUG}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL_DEBUG} -DCMAKE_BUILD_TYPE=Debug - make install -j 2 VERBOSE=1 cache: From 170593045bcdf18f2ef20581a637c80c2d6a854f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 25 Feb 2020 09:46:42 +0100 Subject: [PATCH 131/285] [cmake] remove cuda CC that fail in debug --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3134d68c..76b105bc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,9 +91,9 @@ endif() # Shortening the lists saves a lot of compile time. # if(CUDA_VERSION_MAJOR GREATER 7) - set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61 62) + set(PopSift_CUDA_CC_LIST_BASIC 30 50 52 60 61) else() - set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 ) + set(PopSift_CUDA_CC_LIST_BASIC 30 50 52 ) endif() set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") From aa2921681a3d4d3b891a081c65eb64cb8b3e1144 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 25 Feb 2020 13:50:45 +0100 Subject: [PATCH 132/285] [cmake] reinstate cc 35 --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76b105bc..d6f23edf 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,9 +91,9 @@ endif() # Shortening the lists saves a lot of compile time. # if(CUDA_VERSION_MAJOR GREATER 7) - set(PopSift_CUDA_CC_LIST_BASIC 30 50 52 60 61) + set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 60 61) else() - set(PopSift_CUDA_CC_LIST_BASIC 30 50 52 ) + set(PopSift_CUDA_CC_LIST_BASIC 30 35 50 52 ) endif() set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") From 8c2f60e92e0cc91944983fecd054afdf49561b03 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 18:01:53 +0100 Subject: [PATCH 133/285] [ci] use cache only once --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index d3edeb97..c339158d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,9 +4,6 @@ sudo: required language: cpp compiler: gcc -cache: - apt: true - addons: apt: packages: @@ -110,5 +107,6 @@ script: - make install -j 2 VERBOSE=1 cache: + apt: true directories: - ${CMAKE_INSTALL} From 99b74192948fbe986cfd51c86fe999e31cd684f6 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 18:02:21 +0100 Subject: [PATCH 134/285] [ci] sudo not required --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c339158d..98ffb413 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,4 @@ dist: trusty -sudo: required language: cpp compiler: gcc From 40965e39acd4818f2aa15d51701a571459310149 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 18:02:55 +0100 Subject: [PATCH 135/285] [ci] using xenial with 8 9.2 and 10.2 --- .travis.yml | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98ffb413..409682cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ -dist: trusty +dist: xenial language: cpp compiler: gcc @@ -6,17 +6,18 @@ compiler: gcc addons: apt: packages: - - libboost-filesystem1.55-dev - - libboost-system1.55-dev - - libboost-program-options1.55-dev - - libboost-thread1.55-dev + - libboost-filesystem-dev + - libboost-system-dev + - libboost-program-options-dev + - libboost-thread-dev env: matrix: - - CUDA_VERSION_MAJOR="7" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}-28" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="7" CUDA_VERSION_MINOR="5" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}-18" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="8" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.61-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - + - CUDA_VERSION_MAJOR="8" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.61-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="9" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.148-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="10" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.89-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + + global: - NUM_CPU="`grep processor /proc/cpuinfo | wc -l`"; echo $NUM_CPU - BUILD_SYSTEM="`uname -s`" @@ -32,7 +33,6 @@ env: - POPSIFT_APP_INSTALL_RELEASE=${POPSIFT_APP_BUILD_RELEASE}/install - POPSIFT_APP_INSTALL_DEBUG=${POPSIFT_APP_BUILD_DEBUG}/install # CMAKE - # - CMAKE_URL="https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.tar.gz" - CMAKE_URL="https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.tar.gz" - CMAKE_ROOT=${TRAVIS_BUILD_DIR}/cmake - CMAKE_SOURCE=${CMAKE_ROOT}/source @@ -56,8 +56,9 @@ before_install: fi install: - - CUDA_REPO_PKG=cuda-repo-ubuntu1404_${CUDA_PKG_LONGVERSION}_amd64.deb - - wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG + - UBUNTU_VERSION=ubuntu1604 + - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb + - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update From 2bb8325b38c7a7f486aaf6efe08ae2f2589d6333 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 18:30:22 +0100 Subject: [PATCH 136/285] [ci] --allow-unauthenticated --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 409682cf..99af5031 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,7 +62,7 @@ install: - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update - - travis_retry sudo apt-get install -y --no-install-recommends cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION + - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda before_script: From 262915136cc336d9a684879d7780d95a170cec94 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 18:39:58 +0100 Subject: [PATCH 137/285] [ci] cuda 10 changed to standard package names --- .travis.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 99af5031..9c5769bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,7 +62,12 @@ install: - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update - - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION + - > + if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then + travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION + else + travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated libcuda-dev libcudart-dev libcublas-dev libcurand-dev + fi - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda before_script: From a70d6b7c1097abf6daf6d58a759d8047ea6e4968 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 19:15:49 +0100 Subject: [PATCH 138/285] [ci] missing the key? --- .travis.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9c5769bb..3a01d710 100644 --- a/.travis.yml +++ b/.travis.yml @@ -59,15 +59,17 @@ install: - UBUNTU_VERSION=ubuntu1604 - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG + - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update - - > - if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION - else - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated libcuda-dev libcudart-dev libcublas-dev libcurand-dev - fi + - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION +# - > +# if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then +# travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION +# else +# travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated libcuda-dev libcudart-dev libcublas-dev libcurand-dev +# fi - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda before_script: From ce9fa954a6f84d38cc97b068cd71073c3e30f338 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 26 Feb 2020 19:36:51 +0100 Subject: [PATCH 139/285] [ci] cublas changed the name! --- .travis.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3a01d710..3e9d2db1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -63,13 +63,14 @@ install: - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update - - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION -# - > -# if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then -# travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION cuda-cublas-dev-$CUDA_PKG_VERSION cuda-curand-dev-$CUDA_PKG_VERSION -# else -# travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated libcuda-dev libcudart-dev libcublas-dev libcurand-dev -# fi + # cuda > 10.0 changed cublas naming + - > + if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then + CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION + else + CUBLAS_PKG=libcublas-dev + fi + - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda before_script: From af77e93ee55ddd4661bbf52914acfd0ec015df06 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 27 Feb 2020 13:55:34 +0100 Subject: [PATCH 140/285] [cmake] fix usage for ChooseCudaCC --- CMakeLists.txt | 4 ++-- cmake/ChooseCudaCC.cmake | 44 +++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84981a9d..a74534a7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,8 +92,8 @@ include(ChooseCudaCC) if(NOT DEFINED PopSift_CUDA_CC_LIST) chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC PopSift_CUDA_GENCODE_FLAGS - MIN_CC 30 - MIN_CUDA_VERSION 7.0) + MIN_CC 30 + MIN_CUDA_VERSION 7.0) set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") else() getFlagsForCudaCCList(PopSift_CUDA_CC_LIST diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index 684b9172..fd75478e 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -1,12 +1,28 @@ # +# This file contains two functions: +# chooseCudaCC +# getFlagsForCudaCCList +# +# Motivation: # CUDA hardware and SDKs are developing over time, different SDK support different # hardware, and supported hardware differs depending on platform even for the same -# SDK version. -# This file attempts to provide a function that returns a valid selection of hardware -# for the current SDK and platform. +# SDK version. This file attempts to provide a function that returns a valid selection +# of hardware for the current SDK and platform. It will require updates as CUDA develops, +# and it is currently not complete in terms of existing platforms that support CUDA. +# + +# +# Return the minimal set of supported Cuda CC # -# It will require updates as CUDA develops, and it is currently not complete in terms -# of existing platforms that support CUDA. +# Usage: +# chooseCudaCC(SUPPORTED_CC SUPPORTED_GENCODE_FLAGS +# [MIN_CUDA_VERSION X.Y] +# [MIN_CC XX ]) +# +# SUPPORTED_CC out variable. Stores the list of supported CC. +# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS +# MIN_CUDA_VERSION the minimal supported version of cuda (e.g. 7.5, default 7.0). +# MIN_CC minimal supported Cuda CC by the project (e.g. 35, default 20) # # This function does not edit cache entries or variables in the parent scope # except for the variables whose names are supplied for SUPPORTED_CC and @@ -19,11 +35,6 @@ # end # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}") # -# We assume that ${SUPPORTED_CC} can be overwritten. -# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten. -# We assume that MIN_CC default to 20 -# We assume that MIN_CUDA_VERSION defaults to 7.0 -# function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(options "") set(oneValueArgs MIN_CUDA_VERSION MIN_CC) @@ -88,7 +99,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(CC_LIST "") foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR}) if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND - (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) + (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) list(APPEND CC_LIST ${CC}) endif() endforeach() @@ -119,12 +130,13 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) endfunction() # -# This function is used to create a list of gencode instructions for a given list -# of CCs. -# It takes as arguments is list of CCs and a list variable that can be filled with -# gencode strings. +# Return the gencode parameters for a given list of CCs. +# +# Usage: +# getFlagsForCudaCCList(INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) # -# We assume that ${SUPPORTED_GENCODE_FLAGS} can be overwritten. +# INPUT_CC_LIST in variable. Contains a list of supported CCs. +# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS # function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) set(CC_LIST "${${INPUT_CC_LIST}}") From 0aa91c5c45b99851a636ca45469e146561a250f5 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 10:47:57 -0600 Subject: [PATCH 141/285] Add a basic sync_queue class. --- src/popsift/common/sync_queue.h | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 src/popsift/common/sync_queue.h diff --git a/src/popsift/common/sync_queue.h b/src/popsift/common/sync_queue.h new file mode 100644 index 00000000..676261d3 --- /dev/null +++ b/src/popsift/common/sync_queue.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +namespace popsift { + +/************************************************************* + * SyncQueue + * This is a basic alternative to the Boost sync_queue class. + * It lets threads push and pull items off a queue in a thread + * safe manner. + *************************************************************/ +template +class SyncQueue { + public: + SyncQueue() = default; + + /* Push an item onto the queue and signal it's available. */ + void push(const T& value) { + std::unique_lock lock(mtx_); + items_.push(value); + lock.unlock(); + signal_.notify_one(); + } + + /* Check if the queue is empty - thread safety via mutex. */ + bool empty() { + std::unique_lock lock(mtx_); + return items_.empty(); + } + + /* BLOCKING. Pull an item off the queue, or, wait until one arrives. */ + T pull() { + std::unique_lock lock(mtx_); + signal_.wait(lock, [this] { return !items_.empty(); }); + auto ans = items_.front(); + items_.pop(); + return ans; + } + + private: + std::mutex mtx_; + std::queue items_; + std::condition_variable signal_; +}; + +} // namespace popsift \ No newline at end of file From e979663928bf24ac77129e27ad3c611f01d23d06 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 10:48:34 -0600 Subject: [PATCH 142/285] Replace boost::sync_queue and boost::thread with SyncQueue and std::thread. --- src/popsift/popsift.cpp | 12 +++++++----- src/popsift/popsift.h | 14 +++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 29b7dc6b..95303793 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -5,6 +5,8 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include +#include #include #include "popsift.h" @@ -29,11 +31,11 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode configure( config, true ); - _pipe._thread_stage1.reset( new boost::thread( &PopSift::uploadImages, this )); + _pipe._thread_stage1.reset( new std::thread( &PopSift::uploadImages, this )); if( mode == popsift::Config::ExtractingMode ) - _pipe._thread_stage2.reset( new boost::thread( &PopSift::extractDownloadLoop, this )); + _pipe._thread_stage2.reset( new std::thread( &PopSift::extractDownloadLoop, this )); else - _pipe._thread_stage2.reset( new boost::thread( &PopSift::matchPrepareLoop, this )); + _pipe._thread_stage2.reset( new std::thread( &PopSift::matchPrepareLoop, this )); } PopSift::PopSift( ImageMode imode ) @@ -50,8 +52,8 @@ PopSift::PopSift( ImageMode imode ) _pipe._unused.push( new popsift::ImageFloat ); } - _pipe._thread_stage1.reset( new boost::thread( &PopSift::uploadImages, this )); - _pipe._thread_stage2.reset( new boost::thread( &PopSift::extractDownloadLoop, this )); + _pipe._thread_stage1.reset( new std::thread( &PopSift::uploadImages, this )); + _pipe._thread_stage2.reset( new std::thread( &PopSift::extractDownloadLoop, this )); } PopSift::~PopSift() diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 93bfa6e2..08fd51b8 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -12,9 +12,9 @@ #include #include #include -#include -#include +#include +#include "common/sync_queue.h" #include "sift_conf.h" #include "sift_extremum.h" @@ -74,11 +74,11 @@ class PopSift { struct Pipe { - std::unique_ptr _thread_stage1; - std::unique_ptr _thread_stage2; - boost::sync_queue _queue_stage1; - boost::sync_queue _queue_stage2; - boost::sync_queue _unused; + std::unique_ptr _thread_stage1; + std::unique_ptr _thread_stage2; + popsift::SyncQueue _queue_stage1; + popsift::SyncQueue _queue_stage2; + popsift::SyncQueue _unused; popsift::Pyramid* _pyramid{nullptr}; From 6fc334ec46082733b60fd595a7019f7484dd886f Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 10:59:04 -0600 Subject: [PATCH 143/285] Isolate the boost requirements in src/application. --- CMakeLists.txt | 13 +------------ src/application/CMakeLists.txt | 5 +++-- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 72514f25..a5a86022 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,13 +10,9 @@ OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) OPTION(PopSift_USE_NORMF "The __normf function computes Euclidian distance on large arrays. Fast but stability is uncertain." OFF) OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) -OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link with static Boost libraries" OFF) +OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) OPTION(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) -if(PopSift_BOOST_USE_STATIC_LIBS) - set(Boost_USE_STATIC_LIBS ON) -endif() - if(PopSift_USE_POSITION_INDEPENDENT_CODE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() @@ -40,13 +36,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -G") # set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -G") -find_package(Boost 1.53.0 REQUIRED COMPONENTS system thread) -if(WIN32) - add_definitions("-DBOOST_ALL_NO_LIB") - link_directories(Boost_LIBRARRY_DIR_DEBUG) - link_directories(Boost_LIBRARRY_DIR_RELEASE) -endif(WIN32) - if(BUILD_SHARED_LIBS) message(STATUS "BUILD_SHARED_LIBS ON") # Need to declare CUDA_USE_STATIC_CUDA_RUNTIME as an option to ensure that it is not overwritten in FindCUDA. diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 621a3d4e..34433c57 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -14,8 +14,9 @@ endif() find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR -set(Boost_INCLUDE_DIRS "") -set(Boost_LIBRARIES "") +if(PopSift_BOOST_USE_STATIC_LIBS) + set(Boost_USE_STATIC_LIBS ON) +endif() find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options) set(PD_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) From 20525291a4d3c3edaf0e5fb2e2657d97d9482afc Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 11:03:19 -0600 Subject: [PATCH 144/285] Munge with the README to reflect changes to the boost requirement. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6498e009..b4147bc9 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,16 @@ Dependencies Most of the dependencies can be installed from the common repositories (apt, yum etc): -Boost >= 1.55 ([atomic, chrono, date-time, system, thread]-dev) +Boost >= 1.55 ([atomic, chrono, date-time, system, thread]-dev) (only required for the application) CUDA >= 7.0 DevIL (libdevil-dev) (only required for the application) Build ----- -PopSift has been developed and tested on Linux machines, mostly a variant of Ubuntu, but compiles on MacOSX as well. It comes as a CMake project and requires at least CUDA 7.0 and Boost >= 1.55. It is known to compile and work with NVidia cards of compute capability 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. +PopSift has been developed and tested on Linux machines, mostly a variant of Ubuntu, but compiles on MacOSX as well. It comes as a CMake project and requires at least CUDA 7.0. The example application carries an additional dependency on Boost >= 1.55. It is known to compile and work with NVidia cards of compute capability 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. -If you want to avoid building the application you can run cmake with the option `-DPopSift_BUILD_EXAMPLES:BOOL=OFF`. +If you want to avoid building the example application you can run cmake with the option `-DPopSift_BUILD_EXAMPLES:BOOL=OFF`. If you want to build PopSift as a shared library: `-DBUILD_SHARED_LIBS=ON`. In order to build the library you can run: From 6586b54b769bf46a1d629390dafd92ba09dd962f Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 12:05:18 -0600 Subject: [PATCH 145/285] Remove boost link/include from popsift. --- src/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d636cbb4..5c1bc3e1 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) -CUDA_INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/popsift) +CUDA_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/popsift) CUDA_ADD_LIBRARY(popsift popsift/popsift.cpp popsift/popsift.h @@ -49,7 +49,7 @@ configure_file(popsift/sift_config.h.in # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) target_include_directories(popsift - PUBLIC ${Boost_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} + PUBLIC ${CUDA_INCLUDE_DIRS} "$") @@ -58,7 +58,7 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") # cannot use PRIVATE here as there is a bug in FindCUDA and CUDA_ADD_LIBRARY # https://gitlab.kitware.com/cmake/cmake/issues/16097 -target_link_libraries(popsift ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) +target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) # EXPORTING THE LIBRARY From 3a58df9e2cd47b69104239016d4428ecbe055f6c Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 12:05:46 -0600 Subject: [PATCH 146/285] Ignore the reference.tgz that's created by a test. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 722c38cc..c44a8393 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,5 @@ oxford # Temporary files .DS_Store +# Downloaded archives for tests. +*.tgz From 3b469ca560a35823cb59f40736ba21cc2ee97070 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 13:13:57 -0600 Subject: [PATCH 147/285] Also link to the threads library. --- src/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5c1bc3e1..abc49a81 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -60,6 +60,10 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") # https://gitlab.kitware.com/cmake/cmake/issues/16097 target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) +# Link to threads because we're using C++11 std::thread. +find_package(Threads REQUIRED) +target_link_libraries(popsift ${CMAKE_THREAD_LIBS_INIT}) + # EXPORTING THE LIBRARY # From 1793247f55e76d6d3cb0d8b26f0a8be28fc9fc58 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 13:44:56 -0600 Subject: [PATCH 148/285] Add the boost system component; reintroduce win32 condition. --- src/application/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 34433c57..b2203379 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -17,7 +17,12 @@ find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUD if(PopSift_BOOST_USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) endif() -find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options) +find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options system) +if(WIN32) + add_definitions("-DBOOST_ALL_NO_LIB") + link_directories(Boost_LIBRARRY_DIR_DEBUG) + link_directories(Boost_LIBRARRY_DIR_RELEASE) +endif(WIN32) set(PD_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) set(PD_LINK_LIBS ${Boost_LIBRARIES} ${CUDA_CUDADEVRT_LIBRARY}) From f88f56d7356cd46fcc861a06aa4312e57486dfc5 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 16:46:21 -0600 Subject: [PATCH 149/285] Add doxygen comments to SyncQueue. --- src/popsift/common/sync_queue.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/popsift/common/sync_queue.h b/src/popsift/common/sync_queue.h index 676261d3..d8dfcbdf 100644 --- a/src/popsift/common/sync_queue.h +++ b/src/popsift/common/sync_queue.h @@ -6,18 +6,19 @@ namespace popsift { -/************************************************************* - * SyncQueue - * This is a basic alternative to the Boost sync_queue class. - * It lets threads push and pull items off a queue in a thread - * safe manner. - *************************************************************/ +/** + * @brief A thread safe wrapper around std::queue (replaces boost::sync_queue). + * @tparam T the value type that's stored in the queue. + */ template class SyncQueue { - public: +public: SyncQueue() = default; - /* Push an item onto the queue and signal it's available. */ + /** + * @brief Push an item onto the queue and signal it's available. + * @param[in] value the item to add to the queue. + */ void push(const T& value) { std::unique_lock lock(mtx_); items_.push(value); @@ -25,13 +26,19 @@ class SyncQueue { signal_.notify_one(); } - /* Check if the queue is empty - thread safety via mutex. */ + /** + * @brief Check if the queue is empty - thread safety via mutex. + * @return True if the queue is empty. + */ bool empty() { std::unique_lock lock(mtx_); return items_.empty(); } - /* BLOCKING. Pull an item off the queue, or, wait until one arrives. */ + /** + * @brief Pull an item off the queue, or, wait until one arrives. Blocking. + * @return The front item that was popped off the queue. + */ T pull() { std::unique_lock lock(mtx_); signal_.wait(lock, [this] { return !items_.empty(); }); @@ -40,7 +47,7 @@ class SyncQueue { return ans; } - private: +private: std::mutex mtx_; std::queue items_; std::condition_variable signal_; From f73fd9fb952042999b055c6a2d7b1752b4a8d4aa Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 16:49:29 -0600 Subject: [PATCH 150/285] Move find(threads) to root list; use modern threads target. --- CMakeLists.txt | 3 +++ src/CMakeLists.txt | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a5a86022..de0b96d9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,9 @@ else() set(CUDA_USE_STATIC_CUDA_RUNTIME ON) endif() +# Require threads because of std::thread. +find_package(Threads REQUIRED) + find_package(CUDA 7.0 REQUIRED) if(NOT CUDA_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index abc49a81..62199dc5 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,11 +58,8 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") # cannot use PRIVATE here as there is a bug in FindCUDA and CUDA_ADD_LIBRARY # https://gitlab.kitware.com/cmake/cmake/issues/16097 -target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) +target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} Threads::Threads) -# Link to threads because we're using C++11 std::thread. -find_package(Threads REQUIRED) -target_link_libraries(popsift ${CMAKE_THREAD_LIBS_INIT}) # EXPORTING THE LIBRARY From 7a40eb30949893a87b65c0aef9974dd3e81c0678 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 16:50:30 -0600 Subject: [PATCH 151/285] Move boost static libs option down to application. --- CMakeLists.txt | 1 - src/application/CMakeLists.txt | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de0b96d9..bddc2682 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,6 @@ OPTION(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code OPTION(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) OPTION(PopSift_USE_NORMF "The __normf function computes Euclidian distance on large arrays. Fast but stability is uncertain." OFF) OPTION(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) -OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) OPTION(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) if(PopSift_USE_POSITION_INDEPENDENT_CODE) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index b2203379..d371ef96 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.0) project(PopsiftDemo LANGUAGES CXX) +OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) + if(TARGET popsift) # when compiled in the repository the target is already defined add_library(PopSift::popsift ALIAS popsift) From 54a68ec6315691e4225df9a99f4b02ff7b47cfeb Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 17:03:28 -0600 Subject: [PATCH 152/285] Remove boost from the popsift package; add threads. --- cmake/FindPopsift.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindPopsift.cmake b/cmake/FindPopsift.cmake index a9a48119..feb69acf 100644 --- a/cmake/FindPopsift.cmake +++ b/cmake/FindPopsift.cmake @@ -24,7 +24,7 @@ FIND_PATH(POPSIFT_INCLUDE_DIR popsift/popsift.h ) find_package(CUDA 7.0 REQUIRED) -find_package(Boost 1.53.0 REQUIRED COMPONENTS system filesystem) +find_package(Threads REQUIRED) IF(POPSIFT_INCLUDE_DIR) MESSAGE(STATUS "popsift headers found in ${POPSIFT_INCLUDE_DIR}") From 3c420e5304f9a1d1cf53f0fdad4009f7fe0ebb1b Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 17:15:15 -0600 Subject: [PATCH 153/285] Remove obsolete(?) win32 boost instructions. --- src/application/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index d371ef96..2c5113ff 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -22,8 +22,6 @@ endif() find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options system) if(WIN32) add_definitions("-DBOOST_ALL_NO_LIB") - link_directories(Boost_LIBRARRY_DIR_DEBUG) - link_directories(Boost_LIBRARRY_DIR_RELEASE) endif(WIN32) set(PD_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) From f763f12e601d1cd5864395897540217a3d5f097d Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 17:23:36 -0600 Subject: [PATCH 154/285] Find threads when loading the config if it hasn't been loaded yet. --- src/cmake/Config.cmake.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cmake/Config.cmake.in b/src/cmake/Config.cmake.in index 5aaa2d8e..4e97b309 100644 --- a/src/cmake/Config.cmake.in +++ b/src/cmake/Config.cmake.in @@ -36,8 +36,11 @@ # ################################################################################ - @PACKAGE_INIT@ +if(NOT TARGET Threads::Threads) + find_package(Threads REQUIRED) +endif() + include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake") check_required_components("@PROJECT_NAME@") From 2f3b142d8e7902bc337e9f2ceb67001c9412c323 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Fri, 10 Apr 2020 17:37:11 -0600 Subject: [PATCH 155/285] Resolve two race conditions identified by racecheck. --- src/popsift/s_orientation.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index 64ac5863..f975e55a 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -75,6 +75,7 @@ void ori_par( const int octave, __shared__ float sm_hist[ORI_NBINS]; for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f; + __syncthreads(); /* keypoint fractional geometry */ const float x = iext->xpos; @@ -206,6 +207,7 @@ void ori_par( const int octave, int2 best_index = make_int2( threadIdx.x, threadIdx.x + 32 ); + __syncthreads(); BitonicSort::Warp32 sorter( yval ); sorter.sort64( best_index ); __syncthreads(); From d7ff256266216c3c03d8f7427ae36f01f60d71bf Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Sat, 11 Apr 2020 21:45:59 -0600 Subject: [PATCH 156/285] Remove unnecessary sync. --- src/popsift/common/excl_blk_prefix_sum.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/popsift/common/excl_blk_prefix_sum.h b/src/popsift/common/excl_blk_prefix_sum.h index 9b5f3d7e..8ad76b48 100644 --- a/src/popsift/common/excl_blk_prefix_sum.h +++ b/src/popsift/common/excl_blk_prefix_sum.h @@ -73,7 +73,6 @@ class Block if( threadIdx.x == 0 && threadIdx.y == 0 ) { loop_total = 0; } - __syncthreads(); const int start = threadIdx.y * blockDim.x + threadIdx.x; const int wrap = blockDim.x * blockDim.y; From c238a92d4d7d6bfef3e273365915c11360de0c0a Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Sat, 11 Apr 2020 21:46:35 -0600 Subject: [PATCH 157/285] Resolve the race condition. --- src/popsift/common/excl_blk_prefix_sum.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/popsift/common/excl_blk_prefix_sum.h b/src/popsift/common/excl_blk_prefix_sum.h index 8ad76b48..1cb85279 100644 --- a/src/popsift/common/excl_blk_prefix_sum.h +++ b/src/popsift/common/excl_blk_prefix_sum.h @@ -132,16 +132,15 @@ class Block _mapping_writer.set( ebs, self, cell ); } + // Wait to update loop_total until everyone is done. + __syncthreads(); if( threadIdx.y == 0 && threadIdx.x == 31 ) { loop_total += ibs; } - __syncthreads(); } - // if( threadIdx.y == 0 && threadIdx.x == 31 ) - if( threadIdx.y == 0 ) + if( threadIdx.y == 0 && threadIdx.x == 31 ) { - loop_total = popsift::shuffle( loop_total, 31 ); _total_writer.set( loop_total ); } } From 6e4428b14a9da59886606baea5db9662891a5d82 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Tue, 14 Apr 2020 08:02:59 -0600 Subject: [PATCH 158/285] Reintroduce sync, and set the total on every thread. --- src/popsift/common/excl_blk_prefix_sum.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/popsift/common/excl_blk_prefix_sum.h b/src/popsift/common/excl_blk_prefix_sum.h index 1cb85279..e551b101 100644 --- a/src/popsift/common/excl_blk_prefix_sum.h +++ b/src/popsift/common/excl_blk_prefix_sum.h @@ -137,12 +137,10 @@ class Block if( threadIdx.y == 0 && threadIdx.x == 31 ) { loop_total += ibs; } + __syncthreads(); } - if( threadIdx.y == 0 && threadIdx.x == 31 ) - { - _total_writer.set( loop_total ); - } + _total_writer.set( loop_total ); } }; From 249bbca4d298c456f3116053c3ef9fe1f97cf791 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Tue, 14 Apr 2020 08:56:20 -0600 Subject: [PATCH 159/285] Replace find_package with find_dependency. --- cmake/Config.cmake.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 60061676..30eea599 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -38,9 +38,8 @@ @PACKAGE_INIT@ -if(NOT TARGET Threads::Threads) - find_package(Threads REQUIRED) -endif() +include(CMakeFindDependencyMacro) +find_dependency(Threads REQUIRED) include("${CMAKE_CURRENT_LIST_DIR}/@popsift_targets_export_name@.cmake") check_required_components("@PROJECT_NAME@") From d0edb05df51bf8818978fb828592a53c918c9c51 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Tue, 14 Apr 2020 09:03:00 -0600 Subject: [PATCH 160/285] Bump sync up by one line. --- src/popsift/s_orientation.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index f975e55a..bf70767d 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -204,10 +204,10 @@ void ori_par( const int octave, refined_angle[bin] = predicate ? prev + newbin : -1; yval[bin] = predicate ? -(num*num) / (4.0f * denB) + sm_hist[prev] : -INFINITY; } + __syncthreads(); int2 best_index = make_int2( threadIdx.x, threadIdx.x + 32 ); - __syncthreads(); BitonicSort::Warp32 sorter( yval ); sorter.sort64( best_index ); __syncthreads(); From 3ebbaeeba6bee90e49f6656a6b38e638d1cdb803 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 14 Apr 2020 20:36:33 +0200 Subject: [PATCH 161/285] use the Cordis URL for Popart --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b2cf664f..c684aa3a 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ If you use PopSift for your publication, please cite us as: ## Acknowledgements -PopSift was developed within the project [POPART](http://www.popartproject.eu), which has been funded by the European Commission in the Horizon 2020 framework. +PopSift was developed within the project [POPART](https://cordis.europa.eu/project/id/644874), which has been funded by the European Commission in the Horizon 2020 framework. ___ From f603edfe18fd4dfcf9ea39b0f4bd4dd3bd42c5a3 Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Tue, 14 Apr 2020 12:39:34 -0600 Subject: [PATCH 162/285] Add sync(), move sync() up a line. --- src/popsift/common/excl_blk_prefix_sum.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popsift/common/excl_blk_prefix_sum.h b/src/popsift/common/excl_blk_prefix_sum.h index e551b101..bd809a44 100644 --- a/src/popsift/common/excl_blk_prefix_sum.h +++ b/src/popsift/common/excl_blk_prefix_sum.h @@ -73,6 +73,7 @@ class Block if( threadIdx.x == 0 && threadIdx.y == 0 ) { loop_total = 0; } + __syncthreads(); const int start = threadIdx.y * blockDim.x + threadIdx.x; const int wrap = blockDim.x * blockDim.y; @@ -131,9 +132,8 @@ class Block */ _mapping_writer.set( ebs, self, cell ); } - - // Wait to update loop_total until everyone is done. __syncthreads(); + if( threadIdx.y == 0 && threadIdx.x == 31 ) { loop_total += ibs; } From d730360b87cbd5d5e1d3cc8df14e63c177d2ea8e Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Wed, 15 Apr 2020 11:24:34 +0200 Subject: [PATCH 163/285] [doc] readme: update popart link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c684aa3a..eb1c64b2 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ If you use PopSift for your publication, please cite us as: ## Acknowledgements -PopSift was developed within the project [POPART](https://cordis.europa.eu/project/id/644874), which has been funded by the European Commission in the Horizon 2020 framework. +PopSift was developed within the project [POPART](https://alicevision.org/popart), which has been funded by the European Commission in the Horizon 2020 framework. ___ From 9566c379665a5c31452897b2d75f73d6f61ee654 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Wed, 15 Apr 2020 12:17:24 +0200 Subject: [PATCH 164/285] [doc] readme: add link to cordis Co-Authored-By: Simone Gasparini --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eb1c64b2..f1e85d30 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ If you use PopSift for your publication, please cite us as: ## Acknowledgements -PopSift was developed within the project [POPART](https://alicevision.org/popart), which has been funded by the European Commission in the Horizon 2020 framework. +PopSift was developed within the project [POPART](https://alicevision.org/popart), which has been funded by the [European Commission in the Horizon 2020](https://cordis.europa.eu/project/id/644874) framework. ___ From a993efcf231b6eb80d6ddf1bec1a3001b4e92bd0 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Wed, 15 Apr 2020 12:41:09 +0200 Subject: [PATCH 165/285] [doc] readme: update SIFT patent description --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f1e85d30..ae79b3ae 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,9 @@ We acknowledge that there is at least one SIFT implementation that is vastly fas ## License PopSift is licensed under [MPL v2 license](COPYING.md). -However, SIFT is patented in the US and perhaps other countries, and this license does not release users of this code from any requirements that may arise from such patents. +SIFT was patented in the United States from 1999-03-08 to 2020-03-28. See the [patent link](https://patents.google.com/patent/US6711293B1/en) for more information. +This license does not release users of this code from any requirements that may arise from such patents. + ## Cite Us From ae3ee38d7b5639d63777156b0c17431bfb20aa3a Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Wed, 15 Apr 2020 15:25:03 +0200 Subject: [PATCH 166/285] [doc] readme: update SIFT patent description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ae79b3ae..dd2fc18d 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ We acknowledge that there is at least one SIFT implementation that is vastly fas PopSift is licensed under [MPL v2 license](COPYING.md). SIFT was patented in the United States from 1999-03-08 to 2020-03-28. See the [patent link](https://patents.google.com/patent/US6711293B1/en) for more information. -This license does not release users of this code from any requirements that may arise from such patents. +PopSift license only concerns the PopSift source code and does not release users of this code from any requirements that may arise from patents. ## Cite Us From 7d63bf9dc9d606d0558d7c7db6fc2f4230bc4122 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Mon, 20 Apr 2020 21:07:36 +0200 Subject: [PATCH 167/285] [apps] minor build fix for msvc --- src/application/pgmread.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index 47e14718..e013b1bd 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -38,13 +38,13 @@ unsigned char* readPGMfile( const string& filename, int& w, int& h ) { boost::filesystem::path input_file( filename ); - if( not boost::filesystem::exists( input_file ) ) { + if( ! boost::filesystem::exists( input_file ) ) { cerr << "File " << input_file << " does not exist" << endl; return nullptr; } ifstream pgmfile( filename.c_str(), ios::binary ); - if( not pgmfile.is_open() ) { + if( ! pgmfile.is_open() ) { cerr << "File " << input_file << " could not be opened for reading" << endl; return nullptr; } From fcc92647765cc1dfc2e1718614c45c0e39d7045b Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Mon, 20 Apr 2020 21:10:17 +0200 Subject: [PATCH 168/285] [ci] build examples on appveyor --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index cc2de880..5fa9524d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -20,7 +20,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=OFF -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: From b584a125be8154c81a2b6cd46cc73e2b3a014e38 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 21 Apr 2020 00:50:10 +0200 Subject: [PATCH 169/285] Update Dockerfile --- Dockerfile | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 70fa3090..a07fbbaf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,15 +18,11 @@ LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # System update RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommends\ build-essential \ - cmake \ - git \ wget \ unzip \ - yasm \ - pkg-config \ libtool \ - nasm \ automake \ + libssl-dev \ libpng12-dev \ libjpeg-turbo8-dev \ libdevil-dev \ @@ -35,7 +31,17 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libboost-program-options-dev \ libboost-thread-dev \ && rm -rf /var/lib/apt/lists/* + + # Manually install cmake +WORKDIR /tmp/cmake +RUN wget https://cmake.org/files/v3.17/cmake-3.17.1.tar.gz && \ + tar zxvf cmake-3.17.1.tar.gz && \ + cd cmake-3.17.1 && \ + ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ + make -j2 install && \ + cd /tmp && \ + rm -rf cmake COPY . /opt/popsift WORKDIR /opt/popsift/build -RUN cmake .. -DCMAKE_BUILD_TYPE=Release && make install -j +RUN cmake .. -DCMAKE_BUILD_TYPE=Release && make install -j 2 From d37302ced717a3351e1a6dfe9d4ac8dcd7318ba4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 21 Apr 2020 14:11:47 +0200 Subject: [PATCH 170/285] [doc] changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 5aff60e1..e27e3819 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,3 +20,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for cuda 9 and 10 - Support for RTX architecture - Optional grid filtering +- Dockerfile From b8df6732a04583ea83bc389cab6bb53676a09baa Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 21 Apr 2020 15:11:37 +0200 Subject: [PATCH 171/285] [docker] do not use cd --- Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index a07fbbaf..e8f4c452 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,12 +35,12 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend # Manually install cmake WORKDIR /tmp/cmake RUN wget https://cmake.org/files/v3.17/cmake-3.17.1.tar.gz && \ - tar zxvf cmake-3.17.1.tar.gz && \ - cd cmake-3.17.1 && \ - ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ - make -j2 install && \ - cd /tmp && \ - rm -rf cmake + tar zxvf cmake-3.17.1.tar.gz +WORKDIR /tmp/cmake/cmake-3.17.1 +RUN ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ + make -j2 install +WORKDIR /tmp +RUN rm -rf cmake COPY . /opt/popsift WORKDIR /opt/popsift/build From cea0cbdb21d8fa0fd24f34ce2058eab8ee0f493b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 28 Apr 2020 15:02:44 +0200 Subject: [PATCH 172/285] [ci] debug on appveyor --- appveyor.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 5fa9524d..d36e1717 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -7,6 +7,7 @@ platform: configuration: - Release + - Debug install: - cmd: >- @@ -29,4 +30,4 @@ build: parallel: true cache: - c:\tools\vcpkg\installed\ + - c:\tools\vcpkg\installed\ From 5d09d4aca5ce32df7bd2cf9e6ad556281784616e Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 29 Apr 2020 11:06:54 +0200 Subject: [PATCH 173/285] [ci] testing shared and static --- appveyor.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index d36e1717..46a1ec04 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -9,6 +9,11 @@ configuration: - Release - Debug +environment: + matrix: + - DBUILD_SHARED_LIBS: 0 + - DBUILD_SHARED_LIBS: 1 + install: - cmd: >- call cudaInstallAppveyor.cmd @@ -21,7 +26,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: From 18a9032fd5d23e5b88fb13deb723a26f110816bb Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Wed, 29 Apr 2020 12:32:10 +0200 Subject: [PATCH 174/285] [popsift] test whether image fits into CUDA textures --- src/popsift/common/debug_macros.h | 8 ++ src/popsift/common/device_prop.cu | 230 +++++++++++++++++++++++++++++- src/popsift/common/device_prop.h | 66 +++++++++ src/popsift/popsift.cpp | 133 +++++++++++++++-- src/popsift/popsift.h | 49 +++++++ 5 files changed, 469 insertions(+), 17 deletions(-) diff --git a/src/popsift/common/debug_macros.h b/src/popsift/common/debug_macros.h index 71b271fb..c48212b3 100755 --- a/src/popsift/common/debug_macros.h +++ b/src/popsift/common/debug_macros.h @@ -138,6 +138,14 @@ class BriefDuration std::cerr << __FILE__ << ":" << __LINE__ << std::endl << " " << s << std::endl; \ } +#define POP_WARN(s) { \ + std::cerr << __FILE__ << ":" << __LINE__ << std::endl; \ + std::cerr << " WARNING: " << s << std::endl; \ + } +#define POP_CUDA_WARN(err,s) { \ + std::cerr << __FILE__ << ":" << __LINE__ << std::endl; \ + std::cerr << " WARNING: " << s << cudaGetErrorString(err) << std::endl; \ + } #define POP_CUDA_FATAL(err,s) { \ std::cerr << __FILE__ << ":" << __LINE__ << std::endl; \ std::cerr << " " << s << cudaGetErrorString(err) << std::endl; \ diff --git a/src/popsift/common/device_prop.cu b/src/popsift/common/device_prop.cu index 14bf75ef..91a3a61d 100644 --- a/src/popsift/common/device_prop.cu +++ b/src/popsift/common/device_prop.cu @@ -17,18 +17,24 @@ using namespace std; device_prop_t::device_prop_t( ) { + int currentDevice; cudaError_t err; + err = cudaGetDevice( ¤tDevice ); + POP_CUDA_FATAL_TEST( err, "Cannot get the current CUDA device" ); + err = cudaGetDeviceCount( &_num_devices ); POP_CUDA_FATAL_TEST( err, "Cannot count devices" ); + _properties.resize(_num_devices); + for( int n=0; n<_num_devices; n++ ) { - cudaDeviceProp* p; - _properties.push_back( p = new cudaDeviceProp ); - err = cudaGetDeviceProperties( p, n ); + _properties[n] = new cudaDeviceProp; + err = cudaGetDeviceProperties( _properties[n], n ); POP_CUDA_FATAL_TEST( err, "Cannot get properties for a device" ); } - err = cudaSetDevice( 0 ); + + err = cudaSetDevice( currentDevice ); POP_CUDA_FATAL_TEST( err, "Cannot set device 0" ); } @@ -86,5 +92,221 @@ device_prop_t::~device_prop_t( ) } } +bool device_prop_t::checkLimit_2DtexLinear( int& width, int& height, bool printWarn ) const +{ + bool returnSuccess = true; + int currentDevice; + cudaError_t err; + + err = cudaGetDevice( ¤tDevice ); + if( err != cudaSuccess ) + { + POP_CUDA_WARN( err, "Cannot get current CUDA device" ); + return true; + } + + if( currentDevice >= _properties.size() ) + { + POP_WARN( "CUDA device was not registered at program start" ); + return true; + } + + const cudaDeviceProp* ptr = _properties[currentDevice]; + if( width > ptr->maxTexture2DLayered[0] ) + { + if( printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D linear textures " << width + << " pixels wide." << endl; + } + width = ptr->maxTexture2DLayered[0]; + returnSuccess = false; + } + if( height > ptr->maxTexture2DLayered[1] ) + { + if( returnSuccess && printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D linear textures " << height + << " pixels high." << endl; + } + height = ptr->maxTexture2DLayered[1]; + returnSuccess = false; + } + + return returnSuccess; +} + +bool device_prop_t::checkLimit_2DtexArray( int& width, int& height, bool printWarn ) const +{ + bool returnSuccess = true; + int currentDevice; + cudaError_t err; + + err = cudaGetDevice( ¤tDevice ); + if( err != cudaSuccess ) + { + POP_CUDA_WARN( err, "Cannot get current CUDA device" ); + return true; + } + + if( currentDevice >= _properties.size() ) + { + POP_WARN( "CUDA device was not registered at program start" ); + return true; + } + + const cudaDeviceProp* ptr = _properties[currentDevice]; + if( width > ptr->maxTexture2D[0] ) + { + if( printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D array textures " << width + << " pixels wide." << endl; + } + width = ptr->maxTexture2D[0]; + returnSuccess = false; + } + if( height > ptr->maxTexture2D[1] ) + { + if( returnSuccess && printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D array textures " << height + << " pixels high." << endl; + } + height = ptr->maxTexture2D[1]; + returnSuccess = false; + } + + return returnSuccess; +} + +bool device_prop_t::checkLimit_2DtexLayered( int& width, int& height, int& layers, bool printWarn ) const +{ + bool returnSuccess = true; + int currentDevice; + cudaError_t err; + + err = cudaGetDevice( ¤tDevice ); + if( err != cudaSuccess ) + { + POP_CUDA_WARN( err, "Cannot get current CUDA device" ); + return true; + } + + if( currentDevice >= _properties.size() ) + { + POP_WARN( "CUDA device was not registered at program start" ); + return true; + } + + const cudaDeviceProp* ptr = _properties[currentDevice]; + if( width > ptr->maxTexture2DLayered[0] ) + { + if( printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D array textures " << width + << " pixels wide." << endl; + } + width = ptr->maxTexture2DLayered[0]; + returnSuccess = false; + } + if( height > ptr->maxTexture2DLayered[1] ) + { + if( returnSuccess && printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D array textures " << height + << " pixels high." << endl; + } + height = ptr->maxTexture2DLayered[1]; + returnSuccess = false; + } + if( layers > ptr->maxTexture2DLayered[2] ) + { + if( returnSuccess && printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support 2D array textures " << layers + << " pixels deep." << endl; + } + layers = ptr->maxTexture2DLayered[2]; + returnSuccess = false; + } + + return returnSuccess; +} + +bool device_prop_t::checkLimit_2DsurfLayered( int& width, int& height, int& layers, bool printWarn ) const +{ + bool returnSuccess = true; + int currentDevice; + cudaError_t err; + + err = cudaGetDevice( ¤tDevice ); + if( err != cudaSuccess ) + { + POP_CUDA_WARN( err, "Cannot get current CUDA device" ); + return true; + } + + if( currentDevice >= _properties.size() ) + { + POP_WARN( "CUDA device was not registered at program start" ); + return true; + } + + const cudaDeviceProp* ptr = _properties[currentDevice]; + if( width > ptr->maxSurface2DLayered[0] ) + { + if( printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support layered 2D surfaces " << width + << " bytes wide." << endl; + } + width = ptr->maxSurface2DLayered[0]; + returnSuccess = false; + } + if( height > ptr->maxSurface2DLayered[1] ) + { + if( returnSuccess && printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support layered 2D surfaces " << height + << " pixels high." << endl; + } + height = ptr->maxSurface2DLayered[1]; + returnSuccess = false; + } + if( layers > ptr->maxSurface2DLayered[2] ) + { + if( returnSuccess && printWarn ) + { + std::cerr << __FILE__ << ":" << __LINE__ + << ": CUDA device " << currentDevice << std::endl + << " does not support layered 2D surfaces " << layers + << " pixels deep." << endl; + } + layers = ptr->maxSurface2DLayered[2]; + returnSuccess = false; + } + + return returnSuccess; +} + }} diff --git a/src/popsift/common/device_prop.h b/src/popsift/common/device_prop.h index 04b3870f..4cc50da1 100644 --- a/src/popsift/common/device_prop.h +++ b/src/popsift/common/device_prop.h @@ -16,12 +16,78 @@ class device_prop_t { int _num_devices; std::vector _properties; + +public: + enum { + do_warn = true, + dont_warn = false + }; + public: device_prop_t( ); ~device_prop_t( ); void print( ); void set( int n, bool print_choice = false ); + + /** Check if a request exceeds the current CUDA device's limit in + * texture2Dlinear dimensions. texture2Dlinear is based on CUDA memory that + * can be accessed directly (i.e. no CudaArray). + * @param[in,out] width Desired width of the texture. + * @param[in,out] height Desired height of the texture. + * @param[in] printWarn if true, print warnings to cerr if desired width + * or height exceeds limits. + * @return { true if the desired width and height are possible. + * false if one or both of the desired width and height are impossible. + * The desired width or height (or both) are replaced by the limit.} + */ + bool checkLimit_2DtexLinear( int& width, int& height, bool printWarn ) const; + + /** Check if a request exceeds the current CUDA device's limit in + * texture2D dimensions. texture2D is based on CUDA Arrays, which have + * invisible layout and can only be filled with cudaMemcpy. + * @param[in,out] width Desired width of the texture. + * @param[in,out] height Desired height of the texture. + * @param[in] printWarn if true, print warnings to cerr if desired width + * or height exceeds limits. + * @return { true if the desired width and height are possible. + * false if one or both of the desired width and height are impossible. + * The desired width or height (or both) are replaced by the limit.} + */ + bool checkLimit_2DtexArray( int& width, int& height, bool printWarn ) const; + + /** Check if a request exceeds the current CUDA device's limit in + * texture2DLayered dimensions. texture2DLayered refers to a 3D structure, where + * interpolation happens only in 3D, effectively creating layers. + * @param[in,out] width Desired width of the texture. + * @param[in,out] height Desired height of the texture. + * @param[in,out] layers Desired depth of the texture. + * @param[in] printWarn if true, print warnings to cerr if desired width + * or height exceeds limits. + * @return { true if the desired width, height and depth are possible. + * false if one or both of the desired width and height are impossible. + * The desired width, height and layers are replaced by the limit + * if they exceed it.} + */ + bool checkLimit_2DtexLayered( int& width, int& height, int& layers, + bool printWarn ) const; + + /** Check if a request exceeds the current CUDA device's limit in + * surface2DLayered dimensions. surface2DLayered is the writable equivalent + * to texture2DLayered, but the width must be given in bytes, not elements. + * Since we use float, images cannot be as wide as expected. + * @param[in,out] width Desired width of the texture. + * @param[in,out] height Desired height of the texture. + * @param[in,out] layers Desired depth of the texture. + * @param[in] printWarn if true, print warnings to cerr if desired width + * or height exceeds limits. + * @return { true if the desired width, height and depth are possible. + * false if one or both of the desired width and height are impossible. + * The desired width, height and layers are replaced by the limit + * if they exceed it.} + */ + bool checkLimit_2DsurfLayered( int& width, int& height, int& layers, + bool printWarn ) const; }; }} diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index d33bfab7..d980ccb1 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -91,10 +91,8 @@ bool PopSift::configure( const popsift::Config& config, bool force ) return true; } -bool PopSift::private_init( int w, int h ) +void PopSift::private_apply_scale_factor( int& w, int& h ) { - Pipe& p = _pipe; - /* up=-1 -> scale factor=2 * up= 0 -> scale factor=1 * up= 1 -> scale factor=0.5 @@ -102,22 +100,28 @@ bool PopSift::private_init( int w, int h ) float upscaleFactor = _config.getUpscaleFactor(); float scaleFactor = 1.0f / powf( 2.0f, -upscaleFactor ); - if( p._pyramid != nullptr ) { - p._pyramid->resetDimensions( _config, - ceilf( w * scaleFactor ), - ceilf( h * scaleFactor ) ); - return true; - } - if( _config.octaves < 0 ) { int oct = max(int (floor( logf( (float)min( w, h ) ) / logf( 2.0f ) ) - 3.0f + scaleFactor ), 1); _config.octaves = oct; } - p._pyramid = new popsift::Pyramid( _config, - ceilf( w * scaleFactor ), - ceilf( h * scaleFactor ) ); + w = ceilf( w * scaleFactor ); + h = ceilf( h * scaleFactor ); +} + +bool PopSift::private_init( int w, int h ) +{ + Pipe& p = _pipe; + + private_apply_scale_factor( w, h ); + + if( p._pyramid != nullptr ) { + p._pyramid->resetDimensions( _config, w, h ); + return true; + } + + p._pyramid = new popsift::Pyramid( _config, w, h ); cudaDeviceSynchronize(); @@ -136,6 +140,93 @@ void PopSift::uninit( ) _isInit = false; } +PopSift::AllocTest PopSift::testTextureFit( int width, int height ) +{ + const bool warn = popsift::cuda::device_prop_t::dont_warn; + bool retval; + retval = _device_properties.checkLimit_2DtexLinear( width, + height, + warn ); + if( !retval ) + { + return AllocTest::ImageExceedsLinearTextureLimit; + } + + + /* Scale the width and height - we need that size for the largest + * octave. */ + private_apply_scale_factor( width, height ); + + /* _config.level does not contain the 3 blur levels beyond the first + * that is required for downscaling to the following octave. + * We need all layers to check if we can support enough layers. + */ + int depth = _config.levels + 3; + + /* Surfaces have a limited width in bytes, not in elements. + * Our DOG pyramid stores 4/byte floats, so me must check for + * that width. + */ + int byteWidth = width * sizeof(float); + retval = _device_properties.checkLimit_2DsurfLayered( byteWidth, + height, + depth, + warn ); + if( !retval ) + { + return AllocTest::ImageExceedsLayeredSurfaceLimit; + } + else + { + return AllocTest::Ok; + } +} + +std::string PopSift::testTextureFitErrorString( AllocTest err, int width, int height ) +{ + ostringstream ostr; + + switch( err ) + { + case AllocTest::Ok : + ostr << "? No error." << endl; + break; + case AllocTest::ImageExceedsLinearTextureLimit : + _device_properties.checkLimit_2DtexLinear( width, height, false ); + ostr << "E Cannot load unscaled image. " << endl + << "E It exceeds the max CUDA linear texture size. " << endl + << "E Max is (" << width << "," << height << ")" << endl; + break; + case AllocTest::ImageExceedsLayeredSurfaceLimit : + { + const float upscaleFactor = _config.getUpscaleFactor(); + const float scaleFactor = 1.0f / powf( 2.0f, -upscaleFactor ); + int w = ceilf( width * scaleFactor ) * sizeof(float); + int h = ceilf( height * scaleFactor ); + int d = _config.levels + 3; + + _device_properties.checkLimit_2DsurfLayered( w, h, d, false ); + + w = w / scaleFactor / sizeof(float); + h = h / scaleFactor; + ostr << "E Cannot use" + << (upscaleFactor==1 ? " default " : " ") + << "downscaling factor " << -upscaleFactor + << " (i.e. upscaling by " << pow(2,upscaleFactor) << "). " + << endl + << "E It exceeds the max CUDA layered surface size. " << endl + << "E Change downscaling to fit into (" << w << "," << h + << ") with " << (d-3) << " levels per octave." << endl; + } + break; + default: + ostr << "E Programming error, please report." << endl; + break; + } + return ostr.str(); +} + + SiftJob* PopSift::enqueue( int w, int h, const unsigned char* imageData ) @@ -147,6 +238,14 @@ SiftJob* PopSift::enqueue( int w, exit( -1 ); } + AllocTest a = testTextureFit( w, h ); + if( a != AllocTest::Ok ) + { + cerr << __FILE__ << ":" << __LINE__ << " Image too large" << endl + << testTextureFitErrorString( a,w,h ); + return NULL; + } + SiftJob* job = new SiftJob( w, h, imageData ); _pipe._queue_stage1.push( job ); return job; @@ -163,6 +262,14 @@ SiftJob* PopSift::enqueue( int w, exit( -1 ); } + AllocTest a = testTextureFit( w, h ); + if( a != AllocTest::Ok ) + { + cerr << __FILE__ << ":" << __LINE__ << " Image too large" << endl + << testTextureFitErrorString( a,w,h ); + return NULL; + } + SiftJob* job = new SiftJob( w, h, imageData ); _pipe._queue_stage1.push( job ); return job; diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 9b352386..d9594d52 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -15,6 +15,7 @@ #include #include "common/sync_queue.h" +#include "common/device_prop.h" #include "sift_conf.h" #include "sift_extremum.h" #include "sift_config.h" @@ -96,6 +97,13 @@ class PopSift FloatImages }; + enum AllocTest + { + Ok, + ImageExceedsLinearTextureLimit, + ImageExceedsLayeredSurfaceLimit + }; + public: PopSift() = delete; @@ -117,6 +125,43 @@ class PopSift void uninit( ); + /** Check whether the current CUDA device can support the image + * resolution (width,height) with the current configuration + * based on the card's texture engine. + * The function does not check if there is sufficient available + * memory. + * The first part of the test depends on the parameters width and + * height. It checks whether the image size is supported by CUDA + * 2D linear textures on this card. This is used to load the image + * into the first level of the first octave. + * For the second part of the tst, two value of the configuration + * are important: + * "downsampling", because it determines the required texture size + * after loading. The CUDA 2D layered texture must support the + * scaled width and height. + * "levels", because it determines the number of levels in each + * octave. The CUDA 2D layered texture must support enough depth + * for each level. + * @param width The width of the input image + * @param height The height of the input image + * @return AllocTest::Ok if the image dimensions are supported by this device's + * CUDA texture engine, + * AllocTest::ImageExceedsLinearTextureLimit if the input image size + * exceeds the dimensions of the CUDA Texture used for loading. + * The input image must be scaled. + * AllocTest::ImageExceedsLayeredSurfaceLimit if the scaled input + * image exceeds the dimensions of the CUDA Surface used for the + * image pyramid. The scaling factor must be changes to fit in. + * @remark { If you want to call configure() before extracting features, + * you should call configure() before textTextureFit(). } + * @remark { The current CUDA device is determined by a call to + * cudaGetDevice(), card properties are only read once. } + */ + AllocTest testTextureFit( int width, int height ); + + /** Create a warning string for an AllocTest error code. */ + std::string testTextureFitErrorString( AllocTest err, int w, int h ); + /** Enqueue a byte image, value range 0..255 */ SiftJob* enqueue( int w, int h, @@ -153,6 +198,7 @@ class PopSift private: bool private_init( int w, int h ); + void private_apply_scale_factor( int& w, int& h ); void uploadImages( ); /* The following method are alternative worker functions for Jobs submitted by @@ -180,5 +226,8 @@ class PopSift /// whether the object is initialized bool _isInit{true}; + + // Device property collection runs when this object is created + popsift::cuda::device_prop_t _device_properties; }; From b8f0cc5c2fab73784957b5a0337590911f6b5c43 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 29 Apr 2020 14:07:22 +0200 Subject: [PATCH 175/285] [ci] testing %DBUILD_SHARED_LIBS% --- CMakeLists.txt | 2 +- appveyor.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 26346174..9741aaa2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake") # set(CMAKE_BUILD_TYPE Debug) -if(NOT CMAKE_BUILD_TYPE) +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE Release) message(STATUS "Build type not set, building in Release configuration") else() diff --git a/appveyor.yml b/appveyor.yml index 46a1ec04..e8c49132 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,7 +26,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: From 318ae82c80753e88bdc19201a163816a212dfd01 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 29 Apr 2020 15:42:42 +0200 Subject: [PATCH 176/285] [ci] testing manually replacing \MD for static build --- CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9741aaa2..a87c593e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,15 @@ else() message(STATUS "Building in ${CMAKE_BUILD_TYPE} configuration") endif() +message(STATUS "CMAKE_CONFIGURATION_TYPES: ${CMAKE_CONFIGURATION_TYPES}") +if(MSVC AND NOT BUILD_SHARED_LIBS) + foreach(config ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER ${config} config) + string(REPLACE /MD /MT CMAKE_C_FLAGS_${config} "${CMAKE_C_FLAGS_${config}}") + string(REPLACE /MD /MT CMAKE_CXX_FLAGS_${config} "${CMAKE_CXX_FLAGS_${config}}") + endforeach() +endif() + # for some reason this line is necessary to propagate the standard to nvcc set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_STANDARD 11) From edf7b6958ced5082c34dadfb8ea67e0d50262c0d Mon Sep 17 00:00:00 2001 From: Andrew Hardin Date: Thu, 30 Apr 2020 11:40:42 -0600 Subject: [PATCH 177/285] Remove stale function that's not implemented. --- src/popsift/sift_conf.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/popsift/sift_conf.h b/src/popsift/sift_conf.h index c28580a2..22b1d3b3 100644 --- a/src/popsift/sift_conf.h +++ b/src/popsift/sift_conf.h @@ -111,7 +111,6 @@ struct Config void setMaxExtreme( int m ); void setPrintGaussTables( ); void setDPOrientation( bool on ); - void setMaxExtrema( int extrema ); void setFilterMaxExtrema( int extrema ); void setFilterGridSize( int sz ); void setFilterSorting( const std::string& direction ); From 386a073c50938e8d2d87dac42c40dd8a1215c2cc Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 3 May 2020 17:38:01 +0200 Subject: [PATCH 178/285] [common] preincrement --- src/popsift/common/device_prop.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/common/device_prop.cu b/src/popsift/common/device_prop.cu index 91a3a61d..8ad42946 100644 --- a/src/popsift/common/device_prop.cu +++ b/src/popsift/common/device_prop.cu @@ -28,7 +28,7 @@ device_prop_t::device_prop_t( ) _properties.resize(_num_devices); - for( int n=0; n<_num_devices; n++ ) { + for( int n=0; n<_num_devices; ++n ) { _properties[n] = new cudaDeviceProp; err = cudaGetDeviceProperties( _properties[n], n ); POP_CUDA_FATAL_TEST( err, "Cannot get properties for a device" ); From 8d11841f8339838dcf16f58f991d4289c89e525c Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 3 May 2020 17:38:34 +0200 Subject: [PATCH 179/285] [popsift] declaration and init --- src/popsift/popsift.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index d980ccb1..cf6d8e38 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -143,8 +143,7 @@ void PopSift::uninit( ) PopSift::AllocTest PopSift::testTextureFit( int width, int height ) { const bool warn = popsift::cuda::device_prop_t::dont_warn; - bool retval; - retval = _device_properties.checkLimit_2DtexLinear( width, + bool retval = _device_properties.checkLimit_2DtexLinear( width, height, warn ); if( !retval ) From b3a6537cde9477840d21cc5d19489d78d934d6ec Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 3 May 2020 17:38:51 +0200 Subject: [PATCH 180/285] [popsift] simplify return --- src/popsift/popsift.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index cf6d8e38..53fd944d 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -171,14 +171,8 @@ PopSift::AllocTest PopSift::testTextureFit( int width, int height ) height, depth, warn ); - if( !retval ) - { - return AllocTest::ImageExceedsLayeredSurfaceLimit; - } - else - { - return AllocTest::Ok; - } + + return (retval ? AllocTest::Ok : AllocTest::ImageExceedsLayeredSurfaceLimit); } std::string PopSift::testTextureFitErrorString( AllocTest err, int width, int height ) From 6175918c459361e306dd856e6349d19a3795f1f9 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 3 May 2020 17:39:07 +0200 Subject: [PATCH 181/285] [popsift] nullptr --- src/popsift/popsift.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 53fd944d..90b42fbf 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -236,7 +236,7 @@ SiftJob* PopSift::enqueue( int w, { cerr << __FILE__ << ":" << __LINE__ << " Image too large" << endl << testTextureFitErrorString( a,w,h ); - return NULL; + return nullptr; } SiftJob* job = new SiftJob( w, h, imageData ); @@ -260,7 +260,7 @@ SiftJob* PopSift::enqueue( int w, { cerr << __FILE__ << ":" << __LINE__ << " Image too large" << endl << testTextureFitErrorString( a,w,h ); - return NULL; + return nullptr; } SiftJob* job = new SiftJob( w, h, imageData ); From f60ef34891584c3b9452be8714cbdc15210be9c2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 3 May 2020 17:39:22 +0200 Subject: [PATCH 182/285] [popsift] just formatting --- src/popsift/popsift.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 90b42fbf..2f6cab3f 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -343,9 +343,12 @@ SiftJob::SiftJob( int w, int h, const unsigned char* imageData ) _f = _p.get_future(); _imageData = (unsigned char*)malloc( w*h ); - if( _imageData != nullptr ) { + if( _imageData != nullptr ) + { memcpy( _imageData, imageData, w*h ); - } else { + } + else + { cerr << __FILE__ << ":" << __LINE__ << " Memory limitation" << endl << "E Failed to allocate memory for SiftJob" << endl; exit( -1 ); @@ -360,9 +363,12 @@ SiftJob::SiftJob( int w, int h, const float* imageData ) _f = _p.get_future(); _imageData = (unsigned char*)malloc( w*h*sizeof(float) ); - if( _imageData != nullptr ) { + if( _imageData != nullptr ) + { memcpy( _imageData, imageData, w*h*sizeof(float) ); - } else { + } + else + { cerr << __FILE__ << ":" << __LINE__ << " Memory limitation" << endl << "E Failed to allocate memory for SiftJob" << endl; exit( -1 ); From bdc4e7224a5fa9d592f32ae7e8fdf313b5aa59f9 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 9 May 2020 10:11:49 -0700 Subject: [PATCH 183/285] [cmake]-std=c++11 not hardcoded for MSVC because it is propagated to nvcc causing warnings --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a87c593e..a723f05c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,9 @@ if(MSVC AND NOT BUILD_SHARED_LIBS) endif() # for some reason this line is necessary to propagate the standard to nvcc -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif() set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CUDA_STANDARD 11) From d7a779a11996cd0e074b6ba91651d18296e0c5e4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 9 May 2020 10:12:26 -0700 Subject: [PATCH 184/285] [cmake];-fPIC not hardcoded for MSVC because it is propagated to nvcc causing warnings --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a723f05c..e11669d1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,7 +128,7 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug") endif() list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3") -if(PopSift_USE_POSITION_INDEPENDENT_CODE) +if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") endif() From 2db2ae257cb6707183103ddcecac8494145dcd5a Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 9 May 2020 10:13:23 -0700 Subject: [PATCH 185/285] [cmake] use correct flags for static build for MSVC --- CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e11669d1..b8a95b50 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,9 +36,16 @@ message(STATUS "CMAKE_CONFIGURATION_TYPES: ${CMAKE_CONFIGURATION_TYPES}") if(MSVC AND NOT BUILD_SHARED_LIBS) foreach(config ${CMAKE_CONFIGURATION_TYPES}) string(TOUPPER ${config} config) + message(STATUS "config ${config}") string(REPLACE /MD /MT CMAKE_C_FLAGS_${config} "${CMAKE_C_FLAGS_${config}}") string(REPLACE /MD /MT CMAKE_CXX_FLAGS_${config} "${CMAKE_CXX_FLAGS_${config}}") endforeach() + foreach(config "DEBUG" "RELEASE" "MINSIZEREL" "RELWITHDEBINFO") + string(REPLACE /MD /MT CMAKE_C_FLAGS_${config} "${CMAKE_C_FLAGS_${config}}") + string(REPLACE /MD /MT CMAKE_CXX_FLAGS_${config} "${CMAKE_CXX_FLAGS_${config}}") + message(STATUS "CMAKE_C_FLAGS_${config} ${CMAKE_C_FLAGS_${config}}") + message(STATUS "CMAKE_CXX_FLAGS_${config} ${CMAKE_CXX_FLAGS_${config}}") + endforeach() endif() # for some reason this line is necessary to propagate the standard to nvcc From a67f9cf177e088699d5778e38e801f924fddf255 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 9 May 2020 10:14:58 -0700 Subject: [PATCH 186/285] [cmake] force /MT and /MD into nvcc for windows this is for vcpkg, the intermediate link step is not propagating correctly these flags. Possible side effect is that the flags are repeated in normal building with VS --- CMakeLists.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8a95b50..fc99141f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -139,6 +139,23 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") endif() +if(MSVC) + if(BUILD_SHARED_LIBS) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(PopSift_MVSC_LINKER "/MDd") + else() + set(PopSift_MVSC_LINKER "/MD") + endif() + else() + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(PopSift_MVSC_LINKER "/MTd") + else() + set(PopSift_MVSC_LINKER "/MT") + endif() + endif() + list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${PopSift_MVSC_LINKER}) +endif() + # default stream per-thread implies that each host thread has one non-synchronizing 0-stream # currently, the code requires legacy mode list(APPEND CUDA_NVCC_FLAGS "--default-stream;legacy") From 73e5e08a23b351a5f4c69f59d28bfda8496f1598 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 9 May 2020 10:15:45 -0700 Subject: [PATCH 187/285] [cmake] remove BOOST_ALL_DYN_LINK BOOST_ALL_NO_LIB the first should be only used for dynamic building --- src/CMakeLists.txt | 8 ++++++++ src/application/CMakeLists.txt | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 40b9ab63..4c7dae9a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,6 +62,14 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} Threads::Threads) +if(MSVC AND CMAKE_BUILD_TYPE STREQUAL "Debug") + message(STATUS "MSVC AND CMAKE_BUILD_TYPE STREQUAL debug ") + if(NOT BUILD_SHARED_LIBS) + set_target_properties(popsift PROPERTIES STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:libcmt.lib;/VERBOSE:LIB") + list(APPEND STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:libcmt.lib" "/VERBOSE:LIB") + message(STATUS "adding properties") + endif() +endif() # EXPORTING THE LIBRARY # diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 62fd5378..a94636e0 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -52,7 +52,7 @@ set_property(TARGET popsift-demo PROPERTY CXX_STANDARD 11) target_compile_options(popsift-demo PRIVATE ${PD_COMPILE_OPTIONS} ) target_include_directories(popsift-demo PUBLIC ${PD_INCLUDE_DIRS}) -target_compile_definitions(popsift-demo PRIVATE ${Boost_DEFINITIONS} BOOST_ALL_DYN_LINK BOOST_ALL_NO_LIB) +target_compile_definitions(popsift-demo PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-demo PUBLIC PopSift::popsift ${PD_LINK_LIBS}) set_target_properties(popsift-demo PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") @@ -67,7 +67,7 @@ set_property(TARGET popsift-match PROPERTY CXX_STANDARD 11) target_compile_options(popsift-match PRIVATE ${PD_COMPILE_OPTIONS} ) target_include_directories(popsift-match PUBLIC ${PD_INCLUDE_DIRS}) -target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS} BOOST_ALL_DYN_LINK BOOST_ALL_NO_LIB) +target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) set_target_properties(popsift-match PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") From bff7c92a41865ab5c961b5cb85272bc6d37b0d08 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:35:09 +0200 Subject: [PATCH 188/285] [popsift] use nullptr --- src/popsift/common/assist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 5106fee2..397a81f0 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -133,7 +133,7 @@ static void* memalign(size_t alignment, size_t size) int err = posix_memalign( &ret, alignment, size ); if( err != 0 ) { errno = err; - ret = 0; + ret = nullptr; } return ret; #endif From 0010d879ad57ce42ecca448d5b0e5d93b4fb0b15 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:35:30 +0200 Subject: [PATCH 189/285] [popsift] unneeded ; --- src/popsift/common/assist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 397a81f0..17442057 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -148,4 +148,4 @@ static void memalign_free( void* ptr ) #endif } -}; // namespace popsift +} // namespace popsift From c5583da09279d7bce6cb93e756e8eb996b86a1b8 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:35:46 +0200 Subject: [PATCH 190/285] [cmake] fix typo --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc99141f..add7571d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) -option(PopSift_USE_NORMF "The __normf function computes Euclidian distance on large arrays. Fast but stability is uncertain." OFF) +option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF) option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" ON) From 8e4aef70e47680cfba05b438fa806acf8f1d739b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:37:20 +0200 Subject: [PATCH 191/285] [cmake] set CMAKE_BUILD_TYPE values --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index add7571d..bd2fb4de 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,8 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake") if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE Release) message(STATUS "Build type not set, building in Release configuration") + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") else() message(STATUS "Building in ${CMAKE_BUILD_TYPE} configuration") endif() From 9e23dd018824d9ab62ff4a47fe8071c44baf3c6b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:37:49 +0200 Subject: [PATCH 192/285] [cmake] linker flags --- CMakeLists.txt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bd2fb4de..bf1b34ba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,14 +34,8 @@ else() message(STATUS "Building in ${CMAKE_BUILD_TYPE} configuration") endif() -message(STATUS "CMAKE_CONFIGURATION_TYPES: ${CMAKE_CONFIGURATION_TYPES}") +# ensure the proper linker flags when building the static version on MSVC if(MSVC AND NOT BUILD_SHARED_LIBS) - foreach(config ${CMAKE_CONFIGURATION_TYPES}) - string(TOUPPER ${config} config) - message(STATUS "config ${config}") - string(REPLACE /MD /MT CMAKE_C_FLAGS_${config} "${CMAKE_C_FLAGS_${config}}") - string(REPLACE /MD /MT CMAKE_CXX_FLAGS_${config} "${CMAKE_CXX_FLAGS_${config}}") - endforeach() foreach(config "DEBUG" "RELEASE" "MINSIZEREL" "RELWITHDEBINFO") string(REPLACE /MD /MT CMAKE_C_FLAGS_${config} "${CMAKE_C_FLAGS_${config}}") string(REPLACE /MD /MT CMAKE_CXX_FLAGS_${config} "${CMAKE_CXX_FLAGS_${config}}") From 8d2b8b032486edaa73becbddf1126e554d5ad28d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:38:45 +0200 Subject: [PATCH 193/285] [cmake] simplify PopSift_MVSC_LINKER --- CMakeLists.txt | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf1b34ba..35ac3cb7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,19 +135,16 @@ if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") endif() +# this is to ensure that on MSVC the flags for the linker are properly propagate even to the intermediate +# linking step. This seems not the case e.g. on vcpkg using ninja build. if(MSVC) if(BUILD_SHARED_LIBS) - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(PopSift_MVSC_LINKER "/MDd") - else() - set(PopSift_MVSC_LINKER "/MD") - endif() + set(PopSift_MVSC_LINKER "/MD") else() - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(PopSift_MVSC_LINKER "/MTd") - else() - set(PopSift_MVSC_LINKER "/MT") - endif() + set(PopSift_MVSC_LINKER "/MT") + endif() + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(PopSift_MVSC_LINKER "${PopSift_MVSC_LINKER}d") endif() list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${PopSift_MVSC_LINKER}) endif() From 1640d9ad48add1b746b3cda822801c4f3ee2329f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:39:17 +0200 Subject: [PATCH 194/285] [cmake] doc --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 35ac3cb7..217d4bb0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ if(MSVC AND NOT BUILD_SHARED_LIBS) endif() # for some reason this line is necessary to propagate the standard to nvcc +# On MSVC this is not necessary / nvcc does not recognize the flag for MSVC if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") endif() From d8769ca5827b52bbf04df26cdd5d4e8dfbaa593b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:39:42 +0200 Subject: [PATCH 195/285] [cmake] boost not used anymore --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 217d4bb0..da9afd14 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -224,7 +224,6 @@ message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD}) -message(STATUS "Link with static Boost libraries: " ${PopSift_BOOST_USE_STATIC_LIBS}) message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) From 1f6a3378dd2d8a0f6e7b7fd03ce1060fe75d9dc6 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:40:09 +0200 Subject: [PATCH 196/285] [cmake] fix message for pic --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index da9afd14..06b15efc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -219,7 +219,7 @@ message(STATUS "PopSift version: " ${PROJECT_VERSION}) message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS}) message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES}) -message(STATUS "Generate position independent code: " ${PopSift_USE_POSITION_INDEPENDENT_CODE}) +message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE}) message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) From 15004ceb5c25a8d63d5c13b41b2b85d48d7d7741 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:40:42 +0200 Subject: [PATCH 197/285] [cmake] no pic for MSVC --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06b15efc..a40d13d9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc option(BUILD_SHARED_LIBS "Build shared libraries" ON) -if(PopSift_USE_POSITION_INDEPENDENT_CODE) +if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() From 50bf0f98575d79e23943f9e246cb0977f57a287f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 13:40:56 +0200 Subject: [PATCH 198/285] [cmake] lowercase --- src/application/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index a94636e0..7b909a36 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.12) project(PopsiftDemo LANGUAGES CXX) -OPTION(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) +option(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) if(TARGET popsift) # when compiled in the repository the target is already defined From ca544d4a01b37178516b7d4269754bd2227a0546 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 14:16:43 +0200 Subject: [PATCH 199/285] [apps] use EXIT_SUCCESS --- src/application/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index 4c475448..efd3dd90 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -137,7 +137,7 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in if (vm.count("help")) { std::cout << all << '\n'; - exit(1); + exit(EXIT_SUCCESS); } notify(vm); // Notify does processing (e.g., raise exceptions if required args are missing) From 066f07f57072f699ae5ea0d4cbda8a41d2649c35 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 11 May 2020 14:18:01 +0200 Subject: [PATCH 200/285] [cmake] add -fPIE for exe fix #49 --- src/application/CMakeLists.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 7b909a36..a368746f 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -3,6 +3,23 @@ project(PopsiftDemo LANGUAGES CXX) option(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) +# if this is used as a stand-alone project we need to tell whether to use PIC +if(NOT DEFINED BUILD_SHARED_LIBS) + option(BUILD_SHARED_LIBS "Build shared libraries" ON) + set(CMAKE_POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) +endif() + +# enable -fPIE for executables when -fpic +# https://cmake.org/cmake/help/v3.17/policy/CMP0083.html +if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.14) + cmake_policy(SET CMP0083 NEW) + include(CheckPIESupported) + check_pie_supported() +elseif(CMAKE_POSITION_INDEPENDENT_CODE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # manually add the link flag for gcc + list(APPEND CMAKE_EXE_LINKER_FLAGS "-pie") +endif() + if(TARGET popsift) # when compiled in the repository the target is already defined add_library(PopSift::popsift ALIAS popsift) From a62e684cecfed7daf305355ce7a332bd79e560e3 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 15 May 2020 22:41:19 +0200 Subject: [PATCH 201/285] [cmake] cleaning fix #49 --- src/CMakeLists.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4c7dae9a..0380dd41 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,15 +62,6 @@ set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} Threads::Threads) -if(MSVC AND CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "MSVC AND CMAKE_BUILD_TYPE STREQUAL debug ") - if(NOT BUILD_SHARED_LIBS) - set_target_properties(popsift PROPERTIES STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:libcmt.lib;/VERBOSE:LIB") - list(APPEND STATIC_LIBRARY_FLAGS "/NODEFAULTLIB:libcmt.lib" "/VERBOSE:LIB") - message(STATUS "adding properties") - endif() -endif() - # EXPORTING THE LIBRARY # # place to put the cmake-related files From 30d5818cad61a971fbef4e5779093e7edfa60d6b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 15 May 2020 22:42:42 +0200 Subject: [PATCH 202/285] [ci] win only shared --- appveyor.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index e8c49132..3cdf7298 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -9,10 +9,10 @@ configuration: - Release - Debug -environment: - matrix: - - DBUILD_SHARED_LIBS: 0 - - DBUILD_SHARED_LIBS: 1 +#environment: +# matrix: +# - DBUILD_SHARED_LIBS: 0 +# - DBUILD_SHARED_LIBS: 1 install: - cmd: >- From bdc6baace91c04b7ea892bd2d0c939eebcb2b395 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 15 May 2020 22:58:33 +0200 Subject: [PATCH 203/285] [docker] split deps and image --- Dockerfile | 36 +++++++----------------------------- Dockerfile_deps | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 29 deletions(-) create mode 100644 Dockerfile_deps diff --git a/Dockerfile b/Dockerfile index e8f4c452..67c4f8cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ -ARG CUDA_TAG=9.2-devel -FROM nvidia/cuda:$CUDA_TAG +ARG CUDA_TAG=10.2 +ARG OS_TAG=18.04 +FROM alicevision/popsift:deps-cuda${CUDA_TAG}-ubuntu${OS_TAG} LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # use CUDA_TAG to select the image version to use @@ -16,32 +17,9 @@ LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # Cuda version (ENV): $CUDA_VERSION # System update -RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommends\ - build-essential \ - wget \ - unzip \ - libtool \ - automake \ - libssl-dev \ - libpng12-dev \ - libjpeg-turbo8-dev \ - libdevil-dev \ - libboost-filesystem-dev \ - libboost-system-dev \ - libboost-program-options-dev \ - libboost-thread-dev \ - && rm -rf /var/lib/apt/lists/* - - # Manually install cmake -WORKDIR /tmp/cmake -RUN wget https://cmake.org/files/v3.17/cmake-3.17.1.tar.gz && \ - tar zxvf cmake-3.17.1.tar.gz -WORKDIR /tmp/cmake/cmake-3.17.1 -RUN ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ - make -j2 install -WORKDIR /tmp -RUN rm -rf cmake - COPY . /opt/popsift WORKDIR /opt/popsift/build -RUN cmake .. -DCMAKE_BUILD_TYPE=Release && make install -j 2 +RUN cmake .. -DCMAKE_BUILD_TYPE=Release && \ + make install -j $(nproc) && \ + cd /opt && \ + rm -rf popsift diff --git a/Dockerfile_deps b/Dockerfile_deps new file mode 100644 index 00000000..8974f34d --- /dev/null +++ b/Dockerfile_deps @@ -0,0 +1,46 @@ +ARG CUDA_TAG=10.2 +ARG OS_TAG=18.04 +FROM nvidia/cuda:${CUDA_TAG}-devel-ubuntu${OS_TAG} +LABEL maintainer="AliceVision Team alicevision@googlegroups.com" + +# use CUDA_TAG to select the image version to use +# see https://hub.docker.com/r/nvidia/cuda/ +# +# For example, to create a ubuntu 16.04 with cuda 8.0 for development, use +# docker build --build-arg CUDA_TAG=8.0 --tag alicevision/popsift:deps-cuda${CUDA_TAG}-ubuntu${OS_TAG} . +# +# then execute with nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) +# docker run -it --runtime=nvidia popsift_deps + + +# OS/Version (FILE): cat /etc/issue.net +# Cuda version (ENV): $CUDA_VERSION + +# System update +RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommends\ + build-essential \ + wget \ + unzip \ + libtool \ + automake \ + libssl-dev \ + libpng12-dev \ + libjpeg-turbo8-dev \ + libdevil-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libboost-program-options-dev \ + libboost-thread-dev \ + && rm -rf /var/lib/apt/lists/* + + # Manually install cmake +WORKDIR /tmp/cmake +ENV CMAKE_VERSION=3.17 +ENV CMAKE_VERSION_FULL=${CMAKE_VERSION}.2 +RUN wget https://cmake.org/files/v3.17/cmake-${CMAKE_VERSION_FULL}.tar.gz && \ + tar zxvf cmake-${CMAKE_VERSION_FULL}.tar.gz && \ + cd cmake-${CMAKE_VERSION_FULL} && \ + ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ + make -j8 install && \ + cd /tmp && \ + rm -rf cmake From 44b3d43cdde789e5e578b77ba7ecd9d7722bad81 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:11:53 +0200 Subject: [PATCH 204/285] [docker] fix nproc --- Dockerfile_deps | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile_deps b/Dockerfile_deps index 8974f34d..2a2962bf 100644 --- a/Dockerfile_deps +++ b/Dockerfile_deps @@ -24,7 +24,6 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libtool \ automake \ libssl-dev \ - libpng12-dev \ libjpeg-turbo8-dev \ libdevil-dev \ libboost-filesystem-dev \ @@ -41,6 +40,6 @@ RUN wget https://cmake.org/files/v3.17/cmake-${CMAKE_VERSION_FULL}.tar.gz && \ tar zxvf cmake-${CMAKE_VERSION_FULL}.tar.gz && \ cd cmake-${CMAKE_VERSION_FULL} && \ ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ - make -j8 install && \ + make -j$(nproc) install && \ cd /tmp && \ rm -rf cmake From 144e74750a0e8fb510503beb7593ea28cea41520 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:39:24 +0200 Subject: [PATCH 205/285] [clangformat] add header order policy it goes "local" --- .clang-format | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index e764f59c..e61dfc9c 100644 --- a/.clang-format +++ b/.clang-format @@ -47,4 +47,13 @@ SpaceInEmptyParentheses: false SpacesInContainerLiterals: false SpacesInParentheses: false SpacesInSquareBrackets: false -Standard: Cpp11 +Standard: "c++11" +IncludeCategories: + - Regex: '^".*"' + Priority: 1 + - Regex: '^' + Priority: 2 + - Regex: '^<.*\..*>' + Priority: 3 +SortIncludes: true +IncludeBlocks: Regroup From 329d9add5b9080cd6df5a38dce707ffc190c18e5 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:46:34 +0200 Subject: [PATCH 206/285] [popsfit] reorder include headers * order the headers * remove * use rather than fix #93 --- src/application/main.cpp | 37 ++++++++++++------------ src/application/match.cpp | 33 ++++++++++----------- src/application/pgmread.cpp | 11 +++---- src/popsift/common/assist.h | 4 ++- src/popsift/common/clamp.h | 2 ++ src/popsift/common/debug_macros.cu | 2 +- src/popsift/common/debug_macros.h | 11 +++---- src/popsift/common/device_prop.cu | 5 ++-- src/popsift/common/device_prop.h | 2 +- src/popsift/common/excl_blk_prefix_sum.h | 4 +-- src/popsift/common/plane_2d.cu | 15 ++++++---- src/popsift/common/plane_2d.h | 10 +++---- src/popsift/common/vec_macros.h | 2 +- src/popsift/common/warp_bitonic_sort.h | 7 ++--- src/popsift/common/write_plane_2d.cu | 4 +-- src/popsift/features.cu | 16 +++++----- src/popsift/features.h | 4 +-- src/popsift/gauss_filter.cu | 10 +++---- src/popsift/gauss_filter.h | 2 +- src/popsift/popsift.cpp | 9 ++++-- src/popsift/popsift.h | 16 +++++----- src/popsift/s_desc_grid.cu | 11 ++++--- src/popsift/s_desc_grid.h | 8 ++--- src/popsift/s_desc_igrid.cu | 11 ++++--- src/popsift/s_desc_igrid.h | 6 ++-- src/popsift/s_desc_iloop.cu | 11 ++++--- src/popsift/s_desc_iloop.h | 8 ++--- src/popsift/s_desc_loop.cu | 11 ++++--- src/popsift/s_desc_loop.h | 8 ++--- src/popsift/s_desc_norm_l2.h | 4 +-- src/popsift/s_desc_norm_rs.h | 4 +-- src/popsift/s_desc_normalize.h | 2 +- src/popsift/s_desc_notile.cu | 13 ++++----- src/popsift/s_desc_notile.h | 7 ++--- src/popsift/s_extrema.cu | 22 +++++++------- src/popsift/s_filtergrid.cu | 14 ++++----- src/popsift/s_gradiant.h | 8 ++--- src/popsift/s_image.cu | 11 +++---- src/popsift/s_image.h | 3 +- src/popsift/s_orientation.cu | 17 +++++------ src/popsift/s_pyramid_build.cu | 10 +++---- src/popsift/s_pyramid_build_aa.cu | 4 +-- src/popsift/s_pyramid_build_ai.cu | 4 +-- src/popsift/s_pyramid_build_ra.cu | 4 +-- src/popsift/s_pyramid_fixed.cu | 2 +- src/popsift/s_solve.h | 3 +- src/popsift/sift_conf.cu | 5 ++-- src/popsift/sift_conf.h | 3 +- src/popsift/sift_constants.cu | 6 ++-- src/popsift/sift_constants.h | 2 ++ src/popsift/sift_desc.cu | 22 +++++++------- src/popsift/sift_extremum.h | 4 +-- src/popsift/sift_octave.cu | 21 +++++++------- src/popsift/sift_octave.h | 8 ++--- src/popsift/sift_pyramid.cu | 20 +++++++------ src/popsift/sift_pyramid.h | 11 ++++--- 56 files changed, 261 insertions(+), 253 deletions(-) diff --git a/src/application/main.cpp b/src/application/main.cpp index efd3dd90..0eec1c22 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -5,26 +5,25 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include +#include #include +#include #include #include #include -#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifdef USE_DEVIL #include @@ -176,7 +175,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) unsigned char* image_data; #ifdef USE_DEVIL - if( not pgmread_loading ) + if( ! pgmread_loading ) { if( float_mode ) { @@ -220,7 +219,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) nvtxRangePop( ); // "load and convert image - pgmread" - if( not float_mode ) + if( ! float_mode ) { // PopSift.init( w, h ); job = PopSift.enqueue( w, h, image_data ); @@ -319,7 +318,7 @@ int main(int argc, char **argv) SiftJob* job = jobs.front(); jobs.pop(); if( job ) { - read_job( job, not dont_write ); + read_job( job, ! dont_write ); delete job; } } diff --git a/src/application/match.cpp b/src/application/match.cpp index cac90fa1..852d9b62 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -5,20 +5,6 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - #include #include #include @@ -26,6 +12,19 @@ #include #include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + #ifdef USE_DEVIL #include #endif @@ -174,7 +173,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) nvtxRangePushA( "load and convert image" ); #ifdef USE_DEVIL - if( not pgmread_loading ) + if( ! pgmread_loading ) { ilImage img; if( img.Load( inputFile.c_str() ) == false ) { @@ -238,14 +237,14 @@ int main(int argc, char **argv) } if( boost::filesystem::exists( lFile ) ) { - if( not boost::filesystem::is_regular_file( lFile ) ) { + if( ! boost::filesystem::is_regular_file( lFile ) ) { cout << "Input file " << lFile << " is not a regular file, nothing to do" << endl; return EXIT_FAILURE; } } if( boost::filesystem::exists( rFile ) ) { - if( not boost::filesystem::is_regular_file( rFile ) ) { + if( ! boost::filesystem::is_regular_file( rFile ) ) { cout << "Input file " << rFile << " is not a regular file, nothing to do" << endl; return EXIT_FAILURE; } diff --git a/src/application/pgmread.cpp b/src/application/pgmread.cpp index e013b1bd..91a812f9 100644 --- a/src/application/pgmread.cpp +++ b/src/application/pgmread.cpp @@ -5,13 +5,14 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include -#include +#include "pgmread.h" + #include +#include -#include "pgmread.h" +#include +#include +#include #define RGB2GRAY_IN_INT diff --git a/src/popsift/common/assist.h b/src/popsift/common/assist.h index 17442057..50e6fe6f 100644 --- a/src/popsift/common/assist.h +++ b/src/popsift/common/assist.h @@ -7,6 +7,8 @@ */ #pragma once +#include + #include #include #include @@ -16,7 +18,7 @@ #include #endif -#include "sift_config.h" + namespace popsift { diff --git a/src/popsift/common/clamp.h b/src/popsift/common/clamp.h index 50705f05..03a56386 100644 --- a/src/popsift/common/clamp.h +++ b/src/popsift/common/clamp.h @@ -7,6 +7,8 @@ */ #pragma once +#include + template __device__ __host__ inline T clamp( T val, uint32_t maxval ) diff --git a/src/popsift/common/debug_macros.cu b/src/popsift/common/debug_macros.cu index 514b6e4b..cf4cd735 100755 --- a/src/popsift/common/debug_macros.cu +++ b/src/popsift/common/debug_macros.cu @@ -7,7 +7,7 @@ */ #include "debug_macros.h" -#include +#include using namespace std; diff --git a/src/popsift/common/debug_macros.h b/src/popsift/common/debug_macros.h index 71b271fb..eae11f6a 100755 --- a/src/popsift/common/debug_macros.h +++ b/src/popsift/common/debug_macros.h @@ -7,12 +7,13 @@ */ #pragma once -#include +#include + +#include +#include #include +#include #include -#include -#include -#include // synchronize device and check for an error void pop_sync_check_last_error( const char* file, size_t line ); @@ -134,7 +135,7 @@ class BriefDuration // #define POP_INFO(s) cerr << __FILE__ << ":" << __LINE__ << std::endl << " " << s << endl #define POP_INFO2(silent,s) \ - if (not silent) { \ + if (! silent) { \ std::cerr << __FILE__ << ":" << __LINE__ << std::endl << " " << s << std::endl; \ } diff --git a/src/popsift/common/device_prop.cu b/src/popsift/common/device_prop.cu index 14bf75ef..24fb3829 100644 --- a/src/popsift/common/device_prop.cu +++ b/src/popsift/common/device_prop.cu @@ -5,12 +5,11 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "debug_macros.h" +#include "device_prop.h" #include #include -#include "device_prop.h" -#include "debug_macros.h" - namespace popsift { namespace cuda { using namespace std; diff --git a/src/popsift/common/device_prop.h b/src/popsift/common/device_prop.h index 04b3870f..42df5b48 100644 --- a/src/popsift/common/device_prop.h +++ b/src/popsift/common/device_prop.h @@ -7,8 +7,8 @@ */ #pragma once -#include #include +#include namespace popsift { namespace cuda { diff --git a/src/popsift/common/excl_blk_prefix_sum.h b/src/popsift/common/excl_blk_prefix_sum.h index bd809a44..d77bf7b0 100644 --- a/src/popsift/common/excl_blk_prefix_sum.h +++ b/src/popsift/common/excl_blk_prefix_sum.h @@ -7,11 +7,11 @@ */ #pragma once +#include "assist.h" + #include #include -#include "assist.h" - namespace ExclusivePrefixSum { class IgnoreTotal diff --git a/src/popsift/common/plane_2d.cu b/src/popsift/common/plane_2d.cu index f5dd56e4..773dac24 100644 --- a/src/popsift/common/plane_2d.cu +++ b/src/popsift/common/plane_2d.cu @@ -5,9 +5,16 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "assist.h" +#include "debug_macros.h" +#include "plane_2d.h" + +#include + +#include +#include #include -#include -#include #ifndef _WIN32 #include #else @@ -17,11 +24,7 @@ #include #endif -#include -#include "plane_2d.h" -#include "assist.h" -#include "debug_macros.h" using namespace std; diff --git a/src/popsift/common/plane_2d.h b/src/popsift/common/plane_2d.h index 42fe1c6d..b2331ccc 100644 --- a/src/popsift/common/plane_2d.h +++ b/src/popsift/common/plane_2d.h @@ -6,12 +6,12 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once - -#include -#include -#include -#include #include + +#include +#include +#include +#include #include #include #include diff --git a/src/popsift/common/vec_macros.h b/src/popsift/common/vec_macros.h index 4b476f2c..ee7741ed 100644 --- a/src/popsift/common/vec_macros.h +++ b/src/popsift/common/vec_macros.h @@ -7,7 +7,7 @@ */ #pragma once -#include +#include __device__ static inline float2 operator+( float2 l, const float2& r ) diff --git a/src/popsift/common/warp_bitonic_sort.h b/src/popsift/common/warp_bitonic_sort.h index b1df04d8..26ea5232 100644 --- a/src/popsift/common/warp_bitonic_sort.h +++ b/src/popsift/common/warp_bitonic_sort.h @@ -7,11 +7,10 @@ */ #pragma once -#include -#include - #include "assist.h" +#include + namespace popsift { namespace BitonicSort { @@ -64,7 +63,7 @@ class Warp32 const bool id_less = ( ( threadIdx.x & ( 1 << shift ) ) == 0 ); const bool my_more = id_less ? ( my_val > other_val ) : ( my_val < other_val ); - const bool must_swap = not ( my_more ^ reverse ^ increasing ); + const bool must_swap = ! ( my_more ^ reverse ^ increasing ); int lane = must_swap ? ( 1 << shift ) : 0; return popsift::shuffle_xor( my_index, lane ); diff --git a/src/popsift/common/write_plane_2d.cu b/src/popsift/common/write_plane_2d.cu index 6c9af8ac..68be7c4f 100755 --- a/src/popsift/common/write_plane_2d.cu +++ b/src/popsift/common/write_plane_2d.cu @@ -7,9 +7,9 @@ */ #include "write_plane_2d.h" -#include -#include #include +#include +#include #include using namespace std; diff --git a/src/popsift/features.cu b/src/popsift/features.cu index c634023f..af8ec440 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -5,17 +5,17 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include +#include "common/assist.h" +#include "common/debug_macros.h" +#include "features.h" +#include "sift_extremum.h" -#include -#include #include -#include "features.h" -#include "sift_extremum.h" -#include "common/assist.h" -#include "common/debug_macros.h" +#include +#include +#include +#include using namespace std; diff --git a/src/popsift/features.h b/src/popsift/features.h index 21ad83a2..4fc72a21 100755 --- a/src/popsift/features.h +++ b/src/popsift/features.h @@ -7,11 +7,11 @@ */ #pragma once +#include "sift_constants.h" + #include #include -#include "sift_constants.h" - namespace popsift { struct Descriptor; // float features[128]; diff --git a/src/popsift/gauss_filter.cu b/src/popsift/gauss_filter.cu index 436bf455..af02bbbf 100755 --- a/src/popsift/gauss_filter.cu +++ b/src/popsift/gauss_filter.cu @@ -5,11 +5,11 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - -#include "gauss_filter.h" #include "common/debug_macros.h" +#include "gauss_filter.h" + +#include +#include using namespace std; @@ -377,7 +377,7 @@ void GaussTable::transformBlurTable( ) { for( int level=0; level #include -#include - #include "popsift.h" + #include "gauss_filter.h" -#include "sift_pyramid.h" #include "sift_config.h" +#include "sift_pyramid.h" + +#include +#include +#include using namespace std; diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 9b352386..2595fee3 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -7,18 +7,18 @@ */ #pragma once -#include -#include -#include -#include -#include -#include - #include "common/sync_queue.h" #include "sift_conf.h" -#include "sift_extremum.h" #include "sift_config.h" +#include "sift_extremum.h" + +#include +#include +#include +#include +#include +#include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include diff --git a/src/popsift/s_desc_grid.cu b/src/popsift/s_desc_grid.cu index be167021..7b0ba6ce 100644 --- a/src/popsift/s_desc_grid.cu +++ b/src/popsift/s_desc_grid.cu @@ -5,14 +5,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - -#include "sift_constants.h" -#include "s_gradiant.h" -#include "s_desc_grid.h" #include "common/assist.h" #include "common/vec_macros.h" +#include "s_desc_grid.h" +#include "s_gradiant.h" +#include "sift_constants.h" + +#include using namespace popsift; diff --git a/src/popsift/s_desc_grid.h b/src/popsift/s_desc_grid.h index b674e1b5..2e4635e1 100644 --- a/src/popsift/s_desc_grid.h +++ b/src/popsift/s_desc_grid.h @@ -6,11 +6,11 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "sift_pyramid.h" -#include "sift_octave.h" -#include "sift_extremum.h" -#include "common/plane_2d.h" #include "common/debug_macros.h" +#include "common/plane_2d.h" +#include "sift_extremum.h" +#include "sift_octave.h" +#include "sift_pyramid.h" /* * We assume that this is started with diff --git a/src/popsift/s_desc_igrid.cu b/src/popsift/s_desc_igrid.cu index 9678abde..05795b83 100644 --- a/src/popsift/s_desc_igrid.cu +++ b/src/popsift/s_desc_igrid.cu @@ -5,14 +5,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - -#include "sift_constants.h" -#include "s_gradiant.h" -#include "s_desc_igrid.h" #include "common/assist.h" #include "common/vec_macros.h" +#include "s_desc_igrid.h" +#include "s_gradiant.h" +#include "sift_constants.h" + +#include using namespace popsift; diff --git a/src/popsift/s_desc_igrid.h b/src/popsift/s_desc_igrid.h index 589c5f4e..553336c3 100644 --- a/src/popsift/s_desc_igrid.h +++ b/src/popsift/s_desc_igrid.h @@ -6,10 +6,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "sift_pyramid.h" -#include "sift_octave.h" -#include "sift_extremum.h" #include "common/debug_macros.h" +#include "sift_extremum.h" +#include "sift_octave.h" +#include "sift_pyramid.h" /* * We assume that this is started with diff --git a/src/popsift/s_desc_iloop.cu b/src/popsift/s_desc_iloop.cu index 3f370d42..f26c6085 100644 --- a/src/popsift/s_desc_iloop.cu +++ b/src/popsift/s_desc_iloop.cu @@ -5,14 +5,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - -#include "sift_constants.h" -#include "s_gradiant.h" -#include "s_desc_iloop.h" #include "common/assist.h" #include "common/vec_macros.h" +#include "s_desc_iloop.h" +#include "s_gradiant.h" +#include "sift_constants.h" + +#include using namespace popsift; diff --git a/src/popsift/s_desc_iloop.h b/src/popsift/s_desc_iloop.h index 643c85da..7137c23c 100644 --- a/src/popsift/s_desc_iloop.h +++ b/src/popsift/s_desc_iloop.h @@ -6,11 +6,11 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "sift_pyramid.h" -#include "sift_octave.h" -#include "sift_extremum.h" -#include "common/plane_2d.h" #include "common/debug_macros.h" +#include "common/plane_2d.h" +#include "sift_extremum.h" +#include "sift_octave.h" +#include "sift_pyramid.h" __global__ void ext_desc_iloop( const int octave, diff --git a/src/popsift/s_desc_loop.cu b/src/popsift/s_desc_loop.cu index bd723249..422ef4f2 100644 --- a/src/popsift/s_desc_loop.cu +++ b/src/popsift/s_desc_loop.cu @@ -5,14 +5,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - -#include "sift_constants.h" -#include "s_gradiant.h" -#include "s_desc_loop.h" #include "common/assist.h" #include "common/vec_macros.h" +#include "s_desc_loop.h" +#include "s_gradiant.h" +#include "sift_constants.h" + +#include using namespace popsift; diff --git a/src/popsift/s_desc_loop.h b/src/popsift/s_desc_loop.h index 8a4e756b..65bf3754 100644 --- a/src/popsift/s_desc_loop.h +++ b/src/popsift/s_desc_loop.h @@ -6,11 +6,11 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "sift_pyramid.h" -#include "sift_octave.h" -#include "sift_extremum.h" -#include "common/plane_2d.h" #include "common/debug_macros.h" +#include "common/plane_2d.h" +#include "sift_extremum.h" +#include "sift_octave.h" +#include "sift_pyramid.h" #undef BLOCK_3_DIMS diff --git a/src/popsift/s_desc_norm_l2.h b/src/popsift/s_desc_norm_l2.h index f974aa33..3a7ed858 100644 --- a/src/popsift/s_desc_norm_l2.h +++ b/src/popsift/s_desc_norm_l2.h @@ -6,8 +6,8 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "s_desc_normalize.h" #include "common/assist.h" +#include "s_desc_normalize.h" #include "sift_config.h" using namespace popsift; @@ -128,7 +128,7 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool descr.z = descr.z * norm; descr.w = descr.w * norm; - if( not ignoreme ) { + if( ! ignoreme ) { float4* out4 = (float4*)dst_desc; out4[threadIdx.x] = descr; } diff --git a/src/popsift/s_desc_norm_rs.h b/src/popsift/s_desc_norm_rs.h index cf07aa69..c849a240 100644 --- a/src/popsift/s_desc_norm_rs.h +++ b/src/popsift/s_desc_norm_rs.h @@ -6,8 +6,8 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "s_desc_normalize.h" #include "common/assist.h" +#include "s_desc_normalize.h" using namespace popsift; using namespace std; @@ -73,7 +73,7 @@ void NormalizeRootSift::normalize( const float* src_desc, float* dst_desc, const d_consts.norm_multi ); descr.w = val; - if( not ignoreme ) { + if( ! ignoreme ) { float4* out4 = (float4*)dst_desc; out4[threadIdx.x] = descr; } diff --git a/src/popsift/s_desc_normalize.h b/src/popsift/s_desc_normalize.h index 6f2dea2a..a87d0710 100644 --- a/src/popsift/s_desc_normalize.h +++ b/src/popsift/s_desc_normalize.h @@ -7,9 +7,9 @@ */ #pragma once -#include "sift_extremum.h" #include "s_desc_norm_l2.h" #include "s_desc_norm_rs.h" +#include "sift_extremum.h" template __global__ diff --git a/src/popsift/s_desc_notile.cu b/src/popsift/s_desc_notile.cu index f78e1243..64dd37a7 100644 --- a/src/popsift/s_desc_notile.cu +++ b/src/popsift/s_desc_notile.cu @@ -5,15 +5,14 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include - -#include "sift_constants.h" -#include "s_gradiant.h" -#include "s_desc_notile.h" #include "common/assist.h" #include "common/vec_macros.h" +#include "s_desc_notile.h" +#include "s_gradiant.h" +#include "sift_constants.h" + +#include +#include // 1 -> 19.6 on 980 Ti // 2 -> 19.5 on 980 Ti diff --git a/src/popsift/s_desc_notile.h b/src/popsift/s_desc_notile.h index 929f06c3..0828a4f3 100644 --- a/src/popsift/s_desc_notile.h +++ b/src/popsift/s_desc_notile.h @@ -6,11 +6,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #pragma once -#include "sift_pyramid.h" -#include "sift_octave.h" -#include "sift_extremum.h" #include "common/debug_macros.h" - +#include "sift_extremum.h" +#include "sift_octave.h" +#include "sift_pyramid.h" namespace popsift { diff --git a/src/popsift/s_extrema.cu b/src/popsift/s_extrema.cu index 8bcdd28d..827aa599 100644 --- a/src/popsift/s_extrema.cu +++ b/src/popsift/s_extrema.cu @@ -5,17 +5,17 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "common/assist.h" +#include "common/clamp.h" +#include "common/debug_macros.h" +#include "s_solve.h" +#include "sift_constants.h" +#include "sift_pyramid.h" + #include #include -#include -#include -#include "sift_pyramid.h" -#include "sift_constants.h" -#include "s_solve.h" -#include "common/debug_macros.h" -#include "common/assist.h" -#include "common/clamp.h" +#include namespace popsift{ @@ -342,9 +342,9 @@ bool find_extrema_in_dog_sub( cudaTextureObject_t dog, const float val = readTex( dog, x, y, level ); ModeFunctions f; - if( not f.first_contrast_ok( val ) ) return false; + if( ! f.first_contrast_ok( val ) ) return false; - if( not is_extremum( dog, x-1, y-1, level-1 ) ) { + if( ! is_extremum( dog, x-1, y-1, level-1 ) ) { // if( debug_octave==0 && level==2 && x==14 && y==73 ) printf("But I fail\n"); return false; } @@ -463,7 +463,7 @@ bool find_extrema_in_dog_sub( cudaTextureObject_t dog, const float yn = n.y + d.y; const float sn = n.z + d.z; - if( not f.verify( xn, yn, sn, width, height, maxlevel ) ) { + if( ! f.verify( xn, yn, sn, width, height, maxlevel ) ) { return false; } diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index ad1ac0df..b3652ed5 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -5,9 +5,9 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "sift_pyramid.h" -#include "sift_extremum.h" #include "sift_config.h" +#include "sift_extremum.h" +#include "sift_pyramid.h" #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include @@ -16,16 +16,16 @@ #define nvtxRangePop() #endif -#if not POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER) +#if ! POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER) +#include #include +#include +#include #include -#include +#include #include #include -#include -#include -#include namespace popsift { diff --git a/src/popsift/s_gradiant.h b/src/popsift/s_gradiant.h index adc912d2..aaec9e2d 100644 --- a/src/popsift/s_gradiant.h +++ b/src/popsift/s_gradiant.h @@ -7,13 +7,13 @@ */ #pragma once -#include -#include - -#include "common/plane_2d.h" #include "common/assist.h" +#include "common/plane_2d.h" #include "sift_constants.h" +#include +#include + namespace popsift { /* diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index fd149a5a..b56dd602 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -5,14 +5,15 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "s_image.h" -#include "common/debug_macros.h" #include "common/assist.h" +#include "common/debug_macros.h" +#include "s_image.h" #include "sift_config.h" -#include -#include -#include + #include +#include +#include +#include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include diff --git a/src/popsift/s_image.h b/src/popsift/s_image.h index e908e691..0fb6b5ef 100755 --- a/src/popsift/s_image.h +++ b/src/popsift/s_image.h @@ -7,10 +7,11 @@ */ #pragma once -#include #include "common/plane_2d.h" #include "sift_conf.h" +#include + namespace popsift { /************************************************************* diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index d79ebd9e..7e110f6b 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -5,19 +5,18 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include -#include - #include "common/assist.h" -#include "sift_pyramid.h" -#include "sift_constants.h" -#include "s_gradiant.h" +#include "common/debug_macros.h" #include "common/excl_blk_prefix_sum.h" #include "common/warp_bitonic_sort.h" -#include "common/debug_macros.h" -#include "common/assist.h" +#include "s_gradiant.h" #include "sift_config.h" +#include "sift_constants.h" +#include "sift_pyramid.h" + +#include +#include +#include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include diff --git a/src/popsift/s_pyramid_build.cu b/src/popsift/s_pyramid_build.cu index 4e28dbd2..326ec325 100755 --- a/src/popsift/s_pyramid_build.cu +++ b/src/popsift/s_pyramid_build.cu @@ -5,18 +5,18 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "sift_pyramid.h" -#include "sift_constants.h" -#include "gauss_filter.h" -#include "common/debug_macros.h" #include "common/assist.h" #include "common/clamp.h" +#include "common/debug_macros.h" +#include "gauss_filter.h" #include "s_pyramid_build_aa.h" #include "s_pyramid_build_ai.h" #include "s_pyramid_build_ra.h" +#include "sift_constants.h" +#include "sift_pyramid.h" +#include #include -#include /* It makes no sense whatsoever to change this value */ #define PREV_LEVEL 3 diff --git a/src/popsift/s_pyramid_build_aa.cu b/src/popsift/s_pyramid_build_aa.cu index 1f5cf09f..57614f42 100755 --- a/src/popsift/s_pyramid_build_aa.cu +++ b/src/popsift/s_pyramid_build_aa.cu @@ -5,10 +5,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "common/assist.h" +#include "gauss_filter.h" #include "s_pyramid_build_aa.h" #include "sift_constants.h" -#include "gauss_filter.h" -#include "common/assist.h" namespace popsift { namespace gauss { diff --git a/src/popsift/s_pyramid_build_ai.cu b/src/popsift/s_pyramid_build_ai.cu index ff379d48..525bf1f1 100755 --- a/src/popsift/s_pyramid_build_ai.cu +++ b/src/popsift/s_pyramid_build_ai.cu @@ -5,10 +5,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "common/assist.h" +#include "gauss_filter.h" #include "s_pyramid_build_aa.h" #include "sift_constants.h" -#include "gauss_filter.h" -#include "common/assist.h" namespace popsift { namespace gauss { diff --git a/src/popsift/s_pyramid_build_ra.cu b/src/popsift/s_pyramid_build_ra.cu index db76f218..2b32e62c 100755 --- a/src/popsift/s_pyramid_build_ra.cu +++ b/src/popsift/s_pyramid_build_ra.cu @@ -5,10 +5,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "common/assist.h" +#include "gauss_filter.h" #include "s_pyramid_build_ra.h" #include "sift_constants.h" -#include "gauss_filter.h" -#include "common/assist.h" namespace popsift { namespace gauss { diff --git a/src/popsift/s_pyramid_fixed.cu b/src/popsift/s_pyramid_fixed.cu index 50a6593c..9e3d52aa 100755 --- a/src/popsift/s_pyramid_fixed.cu +++ b/src/popsift/s_pyramid_fixed.cu @@ -13,7 +13,7 @@ #include "common/clamp.h" #include -#include +#include namespace popsift { diff --git a/src/popsift/s_solve.h b/src/popsift/s_solve.h index c6edd5b9..b367f38e 100755 --- a/src/popsift/s_solve.h +++ b/src/popsift/s_solve.h @@ -17,7 +17,8 @@ #undef USE_GAUSSIAN_ELIMINATION #include -#include + +#include #ifndef USE_GAUSSIAN_ELIMINATION diff --git a/src/popsift/sift_conf.cu b/src/popsift/sift_conf.cu index b1ea807f..251f58ff 100644 --- a/src/popsift/sift_conf.cu +++ b/src/popsift/sift_conf.cu @@ -5,9 +5,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include "sift_conf.h" #include "common/debug_macros.h" +#include "sift_conf.h" + +#include using namespace std; diff --git a/src/popsift/sift_conf.h b/src/popsift/sift_conf.h index c28580a2..0c4e817d 100644 --- a/src/popsift/sift_conf.h +++ b/src/popsift/sift_conf.h @@ -8,7 +8,6 @@ #pragma once #include -#include #define MAX_OCTAVES 20 #define MAX_LEVELS 10 @@ -316,7 +315,7 @@ inline bool operator==( const Config& l, const Config& r ) inline bool operator!=( const Config& l, const Config& r ) { - return not l.equal( r ); + return ! l.equal( r ); } }; // namespace popsift diff --git a/src/popsift/sift_constants.cu b/src/popsift/sift_constants.cu index 11eda7fb..7c8da529 100755 --- a/src/popsift/sift_constants.cu +++ b/src/popsift/sift_constants.cu @@ -5,13 +5,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "common/debug_macros.h" +#include "sift_constants.h" + #include #include -#include "sift_constants.h" -#include "common/debug_macros.h" - using namespace std; namespace popsift { diff --git a/src/popsift/sift_constants.h b/src/popsift/sift_constants.h index 71b24ab7..58d5575d 100755 --- a/src/popsift/sift_constants.h +++ b/src/popsift/sift_constants.h @@ -7,6 +7,8 @@ */ #pragma once +#include + #ifndef INF #define INF (1<<29) #endif diff --git a/src/popsift/sift_desc.cu b/src/popsift/sift_desc.cu index ae19dcc0..b0eb0bd1 100644 --- a/src/popsift/sift_desc.cu +++ b/src/popsift/sift_desc.cu @@ -5,21 +5,21 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include -#include - -#include "sift_pyramid.h" -#include "sift_constants.h" -#include "s_gradiant.h" -#include "s_desc_normalize.h" -#include "s_desc_loop.h" -#include "s_desc_iloop.h" +#include "common/assist.h" +#include "common/debug_macros.h" #include "s_desc_grid.h" #include "s_desc_igrid.h" +#include "s_desc_iloop.h" +#include "s_desc_loop.h" +#include "s_desc_normalize.h" #include "s_desc_notile.h" -#include "common/assist.h" -#include "common/debug_macros.h" +#include "s_gradiant.h" #include "sift_config.h" +#include "sift_constants.h" +#include "sift_pyramid.h" + +#include +#include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include diff --git a/src/popsift/sift_extremum.h b/src/popsift/sift_extremum.h index 087dbff8..c3e41954 100755 --- a/src/popsift/sift_extremum.h +++ b/src/popsift/sift_extremum.h @@ -7,11 +7,11 @@ */ #pragma once +#include "sift_constants.h" + #include #include -#include "sift_constants.h" - namespace popsift { /* This is an internal data structure. diff --git a/src/popsift/sift_octave.cu b/src/popsift/sift_octave.cu index 64fbc4a5..82594d59 100755 --- a/src/popsift/sift_octave.cu +++ b/src/popsift/sift_octave.cu @@ -5,23 +5,24 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include + +#include "common/clamp.h" +#include "common/debug_macros.h" +#include "common/write_plane_2d.h" +#include "sift_constants.h" +#include "sift_octave.h" +#include "sift_pyramid.h" + #include + +#include // for placement new +#include #ifdef _WIN32 #include #define stat _stat #define mkdir(name, mode) _mkdir(name) #endif -#include // for placement new - -#include "sift_pyramid.h" -#include "sift_constants.h" -#include "common/debug_macros.h" -#include "common/clamp.h" -#include "common/write_plane_2d.h" -#include "sift_octave.h" - using namespace std; namespace popsift { diff --git a/src/popsift/sift_octave.h b/src/popsift/sift_octave.h index a0122bdf..32add2df 100755 --- a/src/popsift/sift_octave.h +++ b/src/popsift/sift_octave.h @@ -7,13 +7,13 @@ */ #pragma once -#include -#include - #include "s_image.h" #include "sift_conf.h" -#include "sift_extremum.h" #include "sift_constants.h" +#include "sift_extremum.h" + +#include +#include namespace popsift { diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index ccb752a2..80fe80dd 100755 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -5,24 +5,26 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include + +#include "common/assist.h" +#include "common/debug_macros.h" +#include "sift_config.h" +#include "sift_extremum.h" +#include "sift_pyramid.h" + +#include + +#include #include +#include #include #include -#include -#include #ifdef _WIN32 #include #define stat _stat #define mkdir(path, perm) _mkdir(path) #endif -#include "sift_pyramid.h" -#include "sift_extremum.h" -#include "common/debug_macros.h" -#include "common/assist.h" -#include "sift_config.h" - #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) #include #else diff --git a/src/popsift/sift_pyramid.h b/src/popsift/sift_pyramid.h index 791fcc91..14b0b663 100755 --- a/src/popsift/sift_pyramid.h +++ b/src/popsift/sift_pyramid.h @@ -7,16 +7,15 @@ */ #pragma once -#include -#include - -#include "sift_conf.h" -#include "sift_constants.h" #include "features.h" - #include "s_image.h" +#include "sift_conf.h" +#include "sift_constants.h" #include "sift_octave.h" +#include +#include + namespace popsift { struct ExtremaCounters From 7c9bf570cc1328a1afcd9b276d3c15786b38bc27 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:49:42 +0200 Subject: [PATCH 207/285] [popsfit] explicit --- src/popsift/common/plane_2d.h | 2 +- src/popsift/s_filtergrid.cu | 2 +- src/popsift/s_orientation.cu | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/popsift/common/plane_2d.h b/src/popsift/common/plane_2d.h index b2331ccc..86d26f5c 100644 --- a/src/popsift/common/plane_2d.h +++ b/src/popsift/common/plane_2d.h @@ -92,7 +92,7 @@ template struct PlaneT : public PlaneBase T* data; __host__ __device__ PlaneT( ) : data(0) { } - __host__ __device__ PlaneT( T* d ) : data(d) { } + __host__ __device__ explicit PlaneT( T* d ) : data(d) { } __host__ __device__ inline size_t elemSize() const { return elem_size; } }; diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index b3652ed5..078eb114 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -70,7 +70,7 @@ struct FunctionExtractCell struct FunctionIsAbove { int _limit; - FunctionIsAbove( int limit ) : _limit(limit) { } + explicit FunctionIsAbove( int limit ) : _limit(limit) { } __host__ __device__ inline bool operator()( int val ) const diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index 7e110f6b..d0ace5d1 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -250,7 +250,7 @@ class ExtremaRead const Extremum* const _oris; public: inline __device__ - ExtremaRead( const Extremum* const d_oris ) : _oris( d_oris ) { } + explicit ExtremaRead( const Extremum* const d_oris ) : _oris( d_oris ) { } inline __device__ int get( int n ) const { return _oris[n].num_ori; } @@ -261,7 +261,7 @@ class ExtremaWrt Extremum* _oris; public: inline __device__ - ExtremaWrt( Extremum* d_oris ) : _oris( d_oris ) { } + explicit ExtremaWrt( Extremum* d_oris ) : _oris( d_oris ) { } inline __device__ void set( int n, int value ) { _oris[n].idx_ori = value; } @@ -272,7 +272,7 @@ class ExtremaTot int& _extrema_counter; public: inline __device__ - ExtremaTot( int& extrema_counter ) : _extrema_counter( extrema_counter ) { } + explicit ExtremaTot( int& extrema_counter ) : _extrema_counter( extrema_counter ) { } inline __device__ void set( int value ) { _extrema_counter = value; } From da9f6a3018199cd737d50a15774466f83cc374c2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:52:29 +0200 Subject: [PATCH 208/285] [popsfit] nullptr --- src/popsift/features.cu | 34 +++++++++++++++++----------------- src/popsift/popsift.h | 2 +- src/popsift/sift_octave.cu | 4 ++-- src/popsift/sift_pyramid.cu | 6 +++--- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/popsift/features.cu b/src/popsift/features.cu index af8ec440..340c030c 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -38,13 +38,13 @@ FeaturesBase::~FeaturesBase( ) *************************************************************/ FeaturesHost::FeaturesHost( ) - : _ext( 0 ) - , _ori( 0 ) + : _ext( nullptr ) + , _ori( nullptr ) { } FeaturesHost::FeaturesHost( int num_ext, int num_ori ) - : _ext( 0 ) - , _ori( 0 ) + : _ext( nullptr ) + , _ori( nullptr ) { reset( num_ext, num_ori ); } @@ -57,11 +57,11 @@ FeaturesHost::~FeaturesHost( ) void FeaturesHost::reset( int num_ext, int num_ori ) { - if( _ext != 0 ) { free( _ext ); _ext = 0; } - if( _ori != 0 ) { free( _ori ); _ori = 0; } + if( _ext != nullptr ) { free( _ext ); _ext = nullptr; } + if( _ori != nullptr ) { free( _ori ); _ori = nullptr; } _ext = (Feature*)memalign( getPageSize(), num_ext * sizeof(Feature) ); - if( _ext == 0 ) { + if( _ext == nullptr ) { cerr << __FILE__ << ":" << __LINE__ << " Runtime error:" << endl << " Failed to (re)allocate memory for downloading " << num_ext << " features" << endl; if( errno == EINVAL ) cerr << " Alignment is not a power of two." << endl; @@ -69,7 +69,7 @@ void FeaturesHost::reset( int num_ext, int num_ori ) exit( -1 ); } _ori = (Descriptor*)memalign( getPageSize(), num_ori * sizeof(Descriptor) ); - if( _ori == 0 ) { + if( _ori == nullptr ) { cerr << __FILE__ << ":" << __LINE__ << " Runtime error:" << endl << " Failed to (re)allocate memory for downloading " << num_ori << " descriptors" << endl; if( errno == EINVAL ) cerr << " Alignment is not a power of two." << endl; @@ -126,15 +126,15 @@ std::ostream& operator<<( std::ostream& ostr, const FeaturesHost& feature ) *************************************************************/ FeaturesDev::FeaturesDev( ) - : _ext( 0 ) - , _ori( 0 ) - , _rev( 0 ) + : _ext( nullptr ) + , _ori( nullptr ) + , _rev( nullptr ) { } FeaturesDev::FeaturesDev( int num_ext, int num_ori ) - : _ext( 0 ) - , _ori( 0 ) - , _rev( 0 ) + : _ext( nullptr ) + , _ori( nullptr ) + , _rev( nullptr ) { reset( num_ext, num_ori ); } @@ -148,9 +148,9 @@ FeaturesDev::~FeaturesDev( ) void FeaturesDev::reset( int num_ext, int num_ori ) { - if( _ext != 0 ) { cudaFree( _ext ); _ext = 0; } - if( _ori != 0 ) { cudaFree( _ori ); _ori = 0; } - if( _rev != 0 ) { cudaFree( _rev ); _rev = 0; } + if( _ext != nullptr ) { cudaFree( _ext ); _ext = nullptr; } + if( _ori != nullptr ) { cudaFree( _ori ); _ori = nullptr; } + if( _rev != nullptr ) { cudaFree( _rev ); _rev = nullptr; } _ext = popsift::cuda::malloc_devT ( num_ext, __FILE__, __LINE__ ); _ori = popsift::cuda::malloc_devT( num_ori, __FILE__, __LINE__ ); diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 2595fee3..81a3db59 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -145,7 +145,7 @@ class PopSift inline popsift::FeaturesBase* execute( int /*pipe*/, const unsigned char* imageData ) { SiftJob* j = enqueue( _last_init_w, _last_init_h, imageData ); - if( !j ) return 0; + if( !j ) return nullptr; popsift::FeaturesBase* f = j->getBase(); delete j; return f; diff --git a/src/popsift/sift_octave.cu b/src/popsift/sift_octave.cu index 82594d59..82ac973b 100755 --- a/src/popsift/sift_octave.cu +++ b/src/popsift/sift_octave.cu @@ -316,12 +316,12 @@ void Octave::alloc_interm_tex() tex_desc.readMode = cudaReadModeElementType; // read as float tex_desc.filterMode = cudaFilterModePoint; // no interpolation - err = cudaCreateTextureObject( &_intm_tex_point, &res_desc, &tex_desc, 0 ); + err = cudaCreateTextureObject( &_intm_tex_point, &res_desc, &tex_desc, nullptr ); POP_CUDA_FATAL_TEST(err, "Could not create Blur intermediate point texture: "); tex_desc.filterMode = cudaFilterModeLinear; // no interpolation - err = cudaCreateTextureObject( &_intm_tex_linear.tex, &res_desc, &tex_desc, 0 ); + err = cudaCreateTextureObject( &_intm_tex_linear.tex, &res_desc, &tex_desc, nullptr ); POP_CUDA_FATAL_TEST(err, "Could not create Blur intermediate point texture: "); } diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index 80fe80dd..75c3d265 100755 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -144,8 +144,8 @@ Pyramid::Pyramid( const Config& config, dobuf_shadow.i_ext_off[o] = dobuf_shadow.i_ext_off[0] + (o*h_consts.max_extrema); } for (int o = _num_octaves; o Date: Sat, 16 May 2020 00:56:45 +0200 Subject: [PATCH 209/285] [popsift] removing const for primitive types in function params --- src/popsift/s_desc_grid.cu | 4 +--- src/popsift/s_desc_grid.h | 4 +--- src/popsift/s_desc_igrid.cu | 4 +--- src/popsift/s_desc_igrid.h | 4 +--- src/popsift/s_desc_iloop.cu | 6 +---- src/popsift/s_desc_iloop.h | 6 +---- src/popsift/s_desc_loop.cu | 6 +---- src/popsift/s_desc_loop.h | 6 +---- src/popsift/s_desc_norm_rs.h | 11 ++++----- src/popsift/s_desc_notile.cu | 2 +- src/popsift/s_desc_notile.h | 2 +- src/popsift/s_extrema.cu | 39 +++++++++++++++---------------- src/popsift/s_pyramid_build_aa.cu | 24 ++++++------------- src/popsift/s_pyramid_build_aa.h | 24 ++++++------------- src/popsift/s_pyramid_build_ai.cu | 24 ++++++------------- src/popsift/s_pyramid_build_ai.h | 24 ++++++------------- src/popsift/s_pyramid_build_ra.h | 37 ++++++++++++----------------- 17 files changed, 76 insertions(+), 151 deletions(-) diff --git a/src/popsift/s_desc_grid.cu b/src/popsift/s_desc_grid.cu index 7b0ba6ce..099c4709 100644 --- a/src/popsift/s_desc_grid.cu +++ b/src/popsift/s_desc_grid.cu @@ -121,9 +121,7 @@ void ext_desc_grid_sub( const int ix, } } -__global__ -void ext_desc_grid( const int octave, - cudaTextureObject_t layer_tex ) +__global__ void ext_desc_grid(int octave, cudaTextureObject_t layer_tex) { const int o_offset = dct.ori_ps[octave] + blockIdx.x; const int ix = threadIdx.y; diff --git a/src/popsift/s_desc_grid.h b/src/popsift/s_desc_grid.h index 2e4635e1..c0919806 100644 --- a/src/popsift/s_desc_grid.h +++ b/src/popsift/s_desc_grid.h @@ -17,9 +17,7 @@ * block = 16,4,4 * grid = nunmber of orientations */ -__global__ -void ext_desc_grid( const int octave, - cudaTextureObject_t layer_tex ); +__global__ void ext_desc_grid(int octave, cudaTextureObject_t layer_tex); namespace popsift { diff --git a/src/popsift/s_desc_igrid.cu b/src/popsift/s_desc_igrid.cu index 05795b83..9f77f12f 100644 --- a/src/popsift/s_desc_igrid.cu +++ b/src/popsift/s_desc_igrid.cu @@ -74,9 +74,7 @@ void ext_desc_igrid_sub( const float x, const float y, const int level, } } -__global__ -void ext_desc_igrid( const int octave, - cudaTextureObject_t texLinear ) +__global__ void ext_desc_igrid(int octave, cudaTextureObject_t texLinear) { const int num = dct.ori_ct[octave]; diff --git a/src/popsift/s_desc_igrid.h b/src/popsift/s_desc_igrid.h index 553336c3..8980a4bc 100644 --- a/src/popsift/s_desc_igrid.h +++ b/src/popsift/s_desc_igrid.h @@ -16,9 +16,7 @@ * block = 16,4,4 or with 32,4,4, depending on macros * grid = nunmber of orientations */ -__global__ -void ext_desc_igrid( const int octave, - cudaTextureObject_t texLinear ); +__global__ void ext_desc_igrid(int octave, cudaTextureObject_t texLinear); namespace popsift { diff --git a/src/popsift/s_desc_iloop.cu b/src/popsift/s_desc_iloop.cu index f26c6085..84673a20 100644 --- a/src/popsift/s_desc_iloop.cu +++ b/src/popsift/s_desc_iloop.cu @@ -128,11 +128,7 @@ void ext_desc_iloop_sub( const float ang, } } -__global__ -void ext_desc_iloop( const int octave, - cudaTextureObject_t layer_tex, - const int w, - const int h ) +__global__ void ext_desc_iloop(int octave, cudaTextureObject_t layer_tex, int w, int h) { const int o_offset = dct.ori_ps[octave] + blockIdx.x; Descriptor* desc = &dbuf.desc [o_offset]; diff --git a/src/popsift/s_desc_iloop.h b/src/popsift/s_desc_iloop.h index 7137c23c..e69409b6 100644 --- a/src/popsift/s_desc_iloop.h +++ b/src/popsift/s_desc_iloop.h @@ -12,11 +12,7 @@ #include "sift_octave.h" #include "sift_pyramid.h" -__global__ -void ext_desc_iloop( const int octave, - cudaTextureObject_t layer_tex, - const int width, - const int height ); +__global__ void ext_desc_iloop(int octave, cudaTextureObject_t layer_tex, int width, int height); namespace popsift { diff --git a/src/popsift/s_desc_loop.cu b/src/popsift/s_desc_loop.cu index 422ef4f2..4c5f46c2 100644 --- a/src/popsift/s_desc_loop.cu +++ b/src/popsift/s_desc_loop.cu @@ -138,11 +138,7 @@ void ext_desc_loop_sub( const float ang, } } -__global__ -void ext_desc_loop( const int octave, - cudaTextureObject_t layer_tex, - const int w, - const int h ) +__global__ void ext_desc_loop(int octave, cudaTextureObject_t layer_tex, int w, int h) { const int o_offset = dct.ori_ps[octave] + blockIdx.x; Descriptor* desc = &dbuf.desc [o_offset]; diff --git a/src/popsift/s_desc_loop.h b/src/popsift/s_desc_loop.h index 65bf3754..600db498 100644 --- a/src/popsift/s_desc_loop.h +++ b/src/popsift/s_desc_loop.h @@ -14,11 +14,7 @@ #undef BLOCK_3_DIMS -__global__ -void ext_desc_loop( const int octave, - cudaTextureObject_t layer_tex, - const int width, - const int height ); +__global__ void ext_desc_loop(int octave, cudaTextureObject_t layer_tex, int width, int height); namespace popsift { diff --git a/src/popsift/s_desc_norm_rs.h b/src/popsift/s_desc_norm_rs.h index c849a240..3ab5b1fc 100644 --- a/src/popsift/s_desc_norm_rs.h +++ b/src/popsift/s_desc_norm_rs.h @@ -16,20 +16,17 @@ class NormalizeRootSift { public: __device__ static inline - void normalize( float* features, const bool ignoreme ); + void normalize( float* features, bool ignoreme ); __device__ static inline void normalize_restrict( const float* __restrict__ src_desc, float* __restrict__ dest_desc ); - __device__ static inline - void normalize( const float* src_desc, - float* dest_desc, - const bool ignoreme ); + __device__ static inline void normalize(const float* src_desc, float* dest_desc, bool ignoreme); }; __device__ inline -void NormalizeRootSift::normalize( float* features, const bool ignoreme ) +void NormalizeRootSift::normalize( float* features, bool ignoreme ) { normalize( features, features, ignoreme ); } @@ -42,7 +39,7 @@ void NormalizeRootSift::normalize_restrict( const float* __restrict__ src_desc, } __device__ inline -void NormalizeRootSift::normalize( const float* src_desc, float* dst_desc, const bool ignoreme ) +void NormalizeRootSift::normalize( const float* src_desc, float* dst_desc, bool ignoreme ) { const float4* ptr4 = (const float4*)src_desc; diff --git a/src/popsift/s_desc_notile.cu b/src/popsift/s_desc_notile.cu index 64dd37a7..8b12c7f6 100644 --- a/src/popsift/s_desc_notile.cu +++ b/src/popsift/s_desc_notile.cu @@ -130,7 +130,7 @@ void ext_desc_notile( const int octave, namespace popsift { -bool start_ext_desc_notile( const int octave, Octave& oct_obj ) +bool start_ext_desc_notile( int octave, Octave& oct_obj ) { dim3 block; dim3 grid; diff --git a/src/popsift/s_desc_notile.h b/src/popsift/s_desc_notile.h index 0828a4f3..93a91cc3 100644 --- a/src/popsift/s_desc_notile.h +++ b/src/popsift/s_desc_notile.h @@ -14,6 +14,6 @@ namespace popsift { -bool start_ext_desc_notile( const int octave, Octave& oct_obj ); +bool start_ext_desc_notile( int octave, Octave& oct_obj ); }; // namespace popsift diff --git a/src/popsift/s_extrema.cu b/src/popsift/s_extrema.cu index 827aa599..30944e34 100644 --- a/src/popsift/s_extrema.cu +++ b/src/popsift/s_extrema.cu @@ -124,7 +124,7 @@ class ModeFunctions { public: inline __device__ - bool first_contrast_ok( const float val ) const; + bool first_contrast_ok( float val ) const; /* refine * returns -1 : break loop and fail @@ -132,14 +132,14 @@ public: * 1 : break loop and succeed */ inline __device__ - int refine( float3& d, int3& n, const int width, const int height, const int maxlevel, bool last_it ); + int refine( float3& d, int3& n, int width, int height, int maxlevel, bool last_it ); /* * returns true : values after refine make sense * false : they do not */ inline __device__ - bool verify( const float xn, const float yn, const float sn, const int width, const int height, const int maxlevel ) const; + bool verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const; }; template<> @@ -147,13 +147,13 @@ class ModeFunctions { public: inline __device__ - bool first_contrast_ok( const float val ) const + bool first_contrast_ok( float val ) const { return ( fabsf( val ) >= floorf( d_consts.threshold ) ); } inline __device__ - int refine( float3& d, int3& n, const int width, const int height, const int maxlevel, bool last_it ) const + int refine( float3& d, int3& n, int width, int height, int maxlevel, bool last_it ) const { // OpenCV mode is a special case because d remains unmodified. // Either we return 1, and n has not been modified. @@ -187,7 +187,7 @@ public: } inline __device__ - int verify( const float xn, const float yn, const float sn, const int width, const int height, const int maxlevel ) const + int verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const { return true; } @@ -204,7 +204,7 @@ public: } inline __device__ - int refine( float3& d, int3& n, const int width, const int height, const int maxlevel, bool last_it ) const + int refine( float3& d, int3& n, int width, int height, int maxlevel, bool last_it ) const { if( last_it ) return 0; @@ -232,7 +232,7 @@ public: } inline __device__ - int verify( const float xn, const float yn, const float sn, const int width, const int height, const int maxlevel ) const + int verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const { // reject if outside of image bounds or far outside DoG bounds return ( ( xn < 0.0f || @@ -256,7 +256,7 @@ public: } inline __device__ - int refine( float3& d, int3& n, const int width, const int height, const int maxlevel, bool last_it ) const + int refine( float3& d, int3& n, int width, int height, int maxlevel, bool last_it ) const { if( last_it ) return 0; @@ -284,7 +284,7 @@ public: } inline __device__ - int verify( const float xn, const float yn, const float sn, const int width, const int height, const int maxlevel ) const + int verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const { // reject if outside of image bounds or far outside DoG bounds return ( ( xn < 0.0f || @@ -298,16 +298,15 @@ public: }; template -__device__ inline -bool find_extrema_in_dog_sub( cudaTextureObject_t dog, - int debug_octave, - int width, - int height, - const uint32_t maxlevel, - const float w_grid_divider, - const float h_grid_divider, - const int grid_width, - InitialExtremum& ec ) +__device__ inline bool find_extrema_in_dog_sub(cudaTextureObject_t dog, + int debug_octave, + int width, + int height, + uint32_t maxlevel, + float w_grid_divider, + float h_grid_divider, + int grid_width, + InitialExtremum& ec) { ec.xpos = 0.0f; ec.ypos = 0.0f; diff --git a/src/popsift/s_pyramid_build_aa.cu b/src/popsift/s_pyramid_build_aa.cu index 57614f42..c026a8b7 100755 --- a/src/popsift/s_pyramid_build_aa.cu +++ b/src/popsift/s_pyramid_build_aa.cu @@ -14,10 +14,7 @@ namespace popsift { namespace gauss { namespace absoluteSource { -__global__ -void horiz( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int dst_level ) +__global__ void horiz(cudaTextureObject_t src_point_texture, cudaSurfaceObject_t dst_data, int dst_level) { const int src_level = dst_level - 1; const int span = d_gauss.inc.span[dst_level]; @@ -52,10 +49,7 @@ void horiz( cudaTextureObject_t src_point_texture, surf2DLayeredwrite( out, dst_data, off_x*4, off_y, dst_level, cudaBoundaryModeZero ); } -__global__ -void vert( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int dst_level ) +__global__ void vert(cudaTextureObject_t src_point_texture, cudaSurfaceObject_t dst_data, int dst_level) { const int span = d_gauss.inc.span[dst_level]; const float* filter = &d_gauss.inc.filter[dst_level*GAUSS_ALIGN]; @@ -91,10 +85,7 @@ void vert( cudaTextureObject_t src_point_texture, surf2DLayeredwrite( out, dst_data, idx*4, idy, dst_level, cudaBoundaryModeZero ); } -__global__ -void vert_abs0( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int dst_level ) +__global__ void vert_abs0(cudaTextureObject_t src_point_texture, cudaSurfaceObject_t dst_data, int dst_level) { const int span = d_gauss.abs_o0.span[dst_level]; const float* filter = &d_gauss.abs_o0.filter[dst_level*GAUSS_ALIGN]; @@ -130,11 +121,10 @@ void vert_abs0( cudaTextureObject_t src_point_texture, surf2DLayeredwrite( out, dst_data, idx*4, idy, dst_level, cudaBoundaryModeZero ); } -__global__ -void vert_all_abs0( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int start_level, - const int max_level ) +__global__ void vert_all_abs0(cudaTextureObject_t src_point_texture, + cudaSurfaceObject_t dst_data, + int start_level, + int max_level) { const int block_x = blockIdx.x * blockDim.x; const int block_y = blockIdx.y * blockDim.y; diff --git a/src/popsift/s_pyramid_build_aa.h b/src/popsift/s_pyramid_build_aa.h index 5a0baf19..4d3423cf 100755 --- a/src/popsift/s_pyramid_build_aa.h +++ b/src/popsift/s_pyramid_build_aa.h @@ -11,26 +11,16 @@ namespace popsift { namespace gauss { namespace absoluteSource { -__global__ -void horiz( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int dst_level ); +__global__ void horiz(cudaTextureObject_t src_point_texture, cudaSurfaceObject_t dst_data, int dst_level); -__global__ -void vert( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int dst_level ); +__global__ void vert(cudaTextureObject_t src_point_texture, cudaSurfaceObject_t dst_data, int dst_level); -__global__ -void vert_abs0( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int dst_level ); +__global__ void vert_abs0(cudaTextureObject_t src_point_texture, cudaSurfaceObject_t dst_data, int dst_level); -__global__ -void vert_all_abs0( cudaTextureObject_t src_point_texture, - cudaSurfaceObject_t dst_data, - const int start_level, - const int max_level ); +__global__ void vert_all_abs0(cudaTextureObject_t src_point_texture, + cudaSurfaceObject_t dst_data, + int start_level, + int max_level); } // namespace absoluteSource } // namespace gauss diff --git a/src/popsift/s_pyramid_build_ai.cu b/src/popsift/s_pyramid_build_ai.cu index 525bf1f1..c16d636e 100755 --- a/src/popsift/s_pyramid_build_ai.cu +++ b/src/popsift/s_pyramid_build_ai.cu @@ -14,10 +14,7 @@ namespace popsift { namespace gauss { namespace absoluteSourceInterpolated { -__global__ -void horiz( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int dst_level ) +__global__ void horiz(cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_level) { const int src_level = dst_level - 1; const int span = d_gauss.inc.i_span[dst_level]; @@ -43,10 +40,7 @@ void horiz( cudaTextureObject_t src_linear_tex, surf2DLayeredwrite( out, dst_data, off_x*4, blockIdx.y, dst_level, cudaBoundaryModeZero ); } -__global__ -void vert( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int dst_level ) +__global__ void vert(cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_level) { const int span = d_gauss.inc.i_span[dst_level]; const float* filter = &d_gauss.inc.i_filter[dst_level*GAUSS_ALIGN]; @@ -74,10 +68,7 @@ void vert( cudaTextureObject_t src_linear_tex, surf2DLayeredwrite( out, dst_data, (block_x+idx)*4, block_y+idy, dst_level, cudaBoundaryModeZero ); } -__global__ -void vert_abs0( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int dst_level ) +__global__ void vert_abs0(cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_level) { const int span = d_gauss.abs_o0.i_span[dst_level]; const float* filter = &d_gauss.abs_o0.i_filter[dst_level*GAUSS_ALIGN]; @@ -105,11 +96,10 @@ void vert_abs0( cudaTextureObject_t src_linear_tex, surf2DLayeredwrite( out, dst_data, (block_x+idx)*4, block_y+idy, dst_level, cudaBoundaryModeZero ); } -__global__ -void vert_all_abs0( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int start_level, - const int max_level ) +__global__ void vert_all_abs0(cudaTextureObject_t src_linear_tex, + cudaSurfaceObject_t dst_data, + int start_level, + int max_level) { const int block_x = blockIdx.x * blockDim.y; const int block_y = blockIdx.y * blockDim.x; diff --git a/src/popsift/s_pyramid_build_ai.h b/src/popsift/s_pyramid_build_ai.h index 62230f9d..d3431fe7 100755 --- a/src/popsift/s_pyramid_build_ai.h +++ b/src/popsift/s_pyramid_build_ai.h @@ -11,26 +11,16 @@ namespace popsift { namespace gauss { namespace absoluteSourceInterpolated { -__global__ -void horiz( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int dst_level ); +__global__ void horiz(cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_level); -__global__ -void vert( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int dst_level ); +__global__ void vert(cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_level); -__global__ -void vert_abs0( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int dst_level ); +__global__ void vert_abs0(cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_level); -__global__ -void vert_all_abs0( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - const int start_level, - const int max_level ); +__global__ void vert_all_abs0(cudaTextureObject_t src_linear_tex, + cudaSurfaceObject_t dst_data, + int start_level, + int max_level); } // namespace absoluteSourceInterpolated } // namespace gauss diff --git a/src/popsift/s_pyramid_build_ra.h b/src/popsift/s_pyramid_build_ra.h index 7ff33519..0b628bc4 100755 --- a/src/popsift/s_pyramid_build_ra.h +++ b/src/popsift/s_pyramid_build_ra.h @@ -11,30 +11,23 @@ namespace popsift { namespace gauss { namespace normalizedSource { -__global__ -void horiz( cudaTextureObject_t src_data, - cudaSurfaceObject_t dst_data, - const int dst_w, - const int dst_h, - int octave, - float shift ); +__global__ void horiz(cudaTextureObject_t src_data, + cudaSurfaceObject_t dst_data, + int dst_w, + int dst_h, + int octave, + float shift); -__global__ -void horiz_level( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - int dst_w, - int dst_h, - int /* octave - must be 0 */, - int level, - float shift ); +__global__ void horiz_level(cudaTextureObject_t src_linear_tex, + cudaSurfaceObject_t dst_data, + int dst_w, + int dst_h, + int /* octave - must be 0 */, + int level, + float shift); -__global__ -void horiz_all( cudaTextureObject_t src_linear_tex, - cudaSurfaceObject_t dst_data, - int dst_w, - int dst_h, - float shift, - const int max_level ); +__global__ void horiz_all( + cudaTextureObject_t src_linear_tex, cudaSurfaceObject_t dst_data, int dst_w, int dst_h, float shift, int max_level); } // namespace normalizedSource } // namespace gauss From 720b25dc893e8f13139211687c900800479c5091 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:57:46 +0200 Subject: [PATCH 210/285] [popsift] default constructors --- src/popsift/features.cu | 3 +-- src/popsift/s_image.cu | 4 ---- src/popsift/s_image.h | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/popsift/features.cu b/src/popsift/features.cu index 340c030c..023279ff 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -30,8 +30,7 @@ FeaturesBase::FeaturesBase( ) , _num_ori( 0 ) { } -FeaturesBase::~FeaturesBase( ) -{ } +FeaturesBase::~FeaturesBase( ) = default; /************************************************************* * FeaturesHost diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index b56dd602..f26b8e3e 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -42,10 +42,6 @@ ImageBase::ImageBase( int w, int h ) { } -ImageBase::~ImageBase( ) -{ -} - /************************************************************* * Image *************************************************************/ diff --git a/src/popsift/s_image.h b/src/popsift/s_image.h index 0fb6b5ef..6aab2d6a 100755 --- a/src/popsift/s_image.h +++ b/src/popsift/s_image.h @@ -25,7 +25,7 @@ struct ImageBase /** Create a device-sided buffer of the given dimensions */ ImageBase( int w, int h ); - virtual ~ImageBase( ); + virtual ~ImageBase( ) = default; /** Reallocation that takes care of pitch when new dimensions * are smaller and actually reallocation when they are bigger. From df7b9c4bb4a937814c5e31272515197fc8fe96ed Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 00:58:46 +0200 Subject: [PATCH 211/285] [popsift] override instead of virtual --- src/popsift/features.h | 4 ++-- src/popsift/s_image.h | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/popsift/features.h b/src/popsift/features.h index 4fc72a21..1e1a31dd 100755 --- a/src/popsift/features.h +++ b/src/popsift/features.h @@ -70,7 +70,7 @@ class FeaturesHost : public FeaturesBase public: FeaturesHost( ); FeaturesHost( int num_ext, int num_ori ); - virtual ~FeaturesHost( ); + ~FeaturesHost( ) override; typedef Feature* F_iterator; typedef const Feature* F_const_iterator; @@ -106,7 +106,7 @@ class FeaturesDev : public FeaturesBase public: FeaturesDev( ); FeaturesDev( int num_ext, int num_ori ); - virtual ~FeaturesDev( ); + ~FeaturesDev( ) override; void reset( int num_ext, int num_ori ); diff --git a/src/popsift/s_image.h b/src/popsift/s_image.h index 6aab2d6a..493b0525 100755 --- a/src/popsift/s_image.h +++ b/src/popsift/s_image.h @@ -75,12 +75,12 @@ struct Image : public ImageBase /** Create a device-sided buffer of the given dimensions */ Image( int w, int h ); - virtual ~Image( ); + ~Image( ) override; /** Reallocation that takes care of pitch when new dimensions * are smaller and actually reallocation when they are bigger. */ - virtual void resetDimensions( int w, int h ); + void resetDimensions( int w, int h ) override; /* This loading function copies all image data to a local * buffer that is pinned in memory. We should offer two @@ -88,12 +88,12 @@ struct Image : public ImageBase * if the image is already uploaded, and one that takes * an image in pinned memory. */ - virtual void load( void* input ); + void load( void* input ) override; private: - void allocate( int w, int h ); - void createTexture( ); - void destroyTexture( ); + void allocate( int w, int h ) override; + void createTexture( ) override; + void destroyTexture( ) override; private: /* 2D plane holding input image on host for uploading @@ -115,12 +115,12 @@ struct ImageFloat : public ImageBase /** Create a device-sided buffer of the given dimensions */ ImageFloat( int w, int h ); - virtual ~ImageFloat( ); + ~ImageFloat( ) override; /** Reallocation that takes care of pitch when new dimensions * are smaller and actually reallocation when they are bigger. */ - virtual void resetDimensions( int w, int h ); + void resetDimensions( int w, int h ) override; /* This loading function copies all image data to a local * buffer that is pinned in memory. We should offer two @@ -128,12 +128,12 @@ struct ImageFloat : public ImageBase * if the image is already uploaded, and one that takes * an image in pinned memory. */ - virtual void load( void* input ); + void load( void* input ) override; private: - void allocate( int w, int h ); - void createTexture( ); - void destroyTexture( ); + void allocate( int w, int h ) override; + void createTexture( ) override; + void destroyTexture( ) override; private: /* 2D plane holding input image on host for uploading From 42db7fe03ee8d1df11964ff510f58eeeaf8d9848 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:01:10 +0200 Subject: [PATCH 212/285] [popsift] missing init --- src/popsift/s_image.h | 6 ++-- src/popsift/sift_octave.cu | 12 +++---- src/popsift/sift_octave.h | 70 +++++++++++++++++++------------------- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/popsift/s_image.h b/src/popsift/s_image.h index 493b0525..64d8c576 100755 --- a/src/popsift/s_image.h +++ b/src/popsift/s_image.h @@ -59,9 +59,9 @@ struct ImageBase int _max_h; // allocated height of image /* Texture information for input image on device */ - cudaTextureObject_t _input_image_tex; - cudaTextureDesc _input_image_texDesc; - cudaResourceDesc _input_image_resDesc; + cudaTextureObject_t _input_image_tex{}; + cudaTextureDesc _input_image_texDesc{}; + cudaResourceDesc _input_image_resDesc{}; }; /************************************************************* diff --git a/src/popsift/sift_octave.cu b/src/popsift/sift_octave.cu index 82ac973b..3ee65750 100755 --- a/src/popsift/sift_octave.cu +++ b/src/popsift/sift_octave.cu @@ -220,14 +220,14 @@ void Octave::alloc_data_tex() { cudaError_t err; - cudaResourceDesc res_desc; + cudaResourceDesc res_desc{}; res_desc.resType = cudaResourceTypeArray; res_desc.res.array.array = _data; err = cudaCreateSurfaceObject(&_data_surf, &res_desc); POP_CUDA_FATAL_TEST(err, "Could not create Blur data surface: "); - cudaTextureDesc tex_desc; + cudaTextureDesc tex_desc{}; memset(&tex_desc, 0, sizeof(cudaTextureDesc)); tex_desc.normalizedCoords = 0; // addressed (x,y) in [width,height] @@ -299,14 +299,14 @@ void Octave::alloc_interm_tex() { cudaError_t err; - cudaResourceDesc res_desc; + cudaResourceDesc res_desc{}; res_desc.resType = cudaResourceTypeArray; res_desc.res.array.array = _intm; err = cudaCreateSurfaceObject(&_intm_surf, &res_desc); POP_CUDA_FATAL_TEST(err, "Could not create Blur intermediate surface: "); - cudaTextureDesc tex_desc; + cudaTextureDesc tex_desc{}; memset(&tex_desc, 0, sizeof(cudaTextureDesc)); tex_desc.normalizedCoords = 0; // addressed (x,y) in [width,height] @@ -372,14 +372,14 @@ void Octave::alloc_dog_tex() { cudaError_t err; - cudaResourceDesc dog_res_desc; + cudaResourceDesc dog_res_desc{}; dog_res_desc.resType = cudaResourceTypeArray; dog_res_desc.res.array.array = _dog_3d; err = cudaCreateSurfaceObject(&_dog_3d_surf, &dog_res_desc); POP_CUDA_FATAL_TEST(err, "Could not create DoG surface: "); - cudaTextureDesc dog_tex_desc; + cudaTextureDesc dog_tex_desc{}; memset(&dog_tex_desc, 0, sizeof(cudaTextureDesc)); dog_tex_desc.normalizedCoords = 0; // addressed (x,y) in [width,height] dog_tex_desc.addressMode[0] = cudaAddressModeClamp; diff --git a/src/popsift/sift_octave.h b/src/popsift/sift_octave.h index 32add2df..b6146c47 100755 --- a/src/popsift/sift_octave.h +++ b/src/popsift/sift_octave.h @@ -24,44 +24,44 @@ struct LinearTexture class Octave { - int _w; - int _h; - int _max_w; - int _max_h; - float _w_grid_divider; - float _h_grid_divider; - int _debug_octave_id; - int _levels; - int _gauss_group; - - cudaArray_t _data; - cudaChannelFormatDesc _data_desc; - cudaExtent _data_ext; - cudaSurfaceObject_t _data_surf; - cudaTextureObject_t _data_tex_point; - LinearTexture _data_tex_linear; - - cudaArray_t _intm; - cudaChannelFormatDesc _intm_desc; - cudaExtent _intm_ext; - cudaSurfaceObject_t _intm_surf; - cudaTextureObject_t _intm_tex_point; - LinearTexture _intm_tex_linear; - - cudaArray_t _dog_3d; - cudaChannelFormatDesc _dog_3d_desc; - cudaExtent _dog_3d_ext; - cudaSurfaceObject_t _dog_3d_surf; - cudaTextureObject_t _dog_3d_tex_point; - LinearTexture _dog_3d_tex_linear; + int _w{}; + int _h{}; + int _max_w{}; + int _max_h{}; + float _w_grid_divider{}; + float _h_grid_divider{}; + int _debug_octave_id{}; + int _levels{}; + int _gauss_group{}; + + cudaArray_t _data{}; + cudaChannelFormatDesc _data_desc{}; + cudaExtent _data_ext{}; + cudaSurfaceObject_t _data_surf{}; + cudaTextureObject_t _data_tex_point{}; + LinearTexture _data_tex_linear{}; + + cudaArray_t _intm{}; + cudaChannelFormatDesc _intm_desc{}; + cudaExtent _intm_ext{}; + cudaSurfaceObject_t _intm_surf{}; + cudaTextureObject_t _intm_tex_point{}; + LinearTexture _intm_tex_linear{}; + + cudaArray_t _dog_3d{}; + cudaChannelFormatDesc _dog_3d_desc{}; + cudaExtent _dog_3d_ext{}; + cudaSurfaceObject_t _dog_3d_surf{}; + cudaTextureObject_t _dog_3d_tex_point{}; + LinearTexture _dog_3d_tex_linear{}; // one CUDA stream per level // consider whether some of them can be removed - cudaStream_t _stream; - cudaEvent_t _scale_done; - cudaEvent_t _extrema_done; - cudaEvent_t _ori_done; - cudaEvent_t _desc_done; + cudaStream_t _stream{}; + cudaEvent_t _scale_done{}; + cudaEvent_t _extrema_done{}; + cudaEvent_t _ori_done{}; + cudaEvent_t _desc_done{}; public: Octave( ); From 7dce142ab29045de9b3296e76cc3a017213f9172 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:01:23 +0200 Subject: [PATCH 213/285] [popsfit] nullptr --- src/popsift/sift_octave.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/popsift/sift_octave.cu b/src/popsift/sift_octave.cu index 3ee65750..430bc298 100755 --- a/src/popsift/sift_octave.cu +++ b/src/popsift/sift_octave.cu @@ -237,7 +237,7 @@ void Octave::alloc_data_tex() tex_desc.readMode = cudaReadModeElementType; // read as float tex_desc.filterMode = cudaFilterModePoint; // no interpolation - err = cudaCreateTextureObject( &_data_tex_point, &res_desc, &tex_desc, 0 ); + err = cudaCreateTextureObject( &_data_tex_point, &res_desc, &tex_desc, nullptr ); POP_CUDA_FATAL_TEST(err, "Could not create Blur data point texture: "); memset(&tex_desc, 0, sizeof(cudaTextureDesc)); @@ -248,7 +248,7 @@ void Octave::alloc_data_tex() tex_desc.readMode = cudaReadModeElementType; // read as float tex_desc.filterMode = cudaFilterModeLinear; // no interpolation - err = cudaCreateTextureObject( &_data_tex_linear.tex, &res_desc, &tex_desc, 0 ); + err = cudaCreateTextureObject( &_data_tex_linear.tex, &res_desc, &tex_desc, nullptr ); POP_CUDA_FATAL_TEST(err, "Could not create Blur data point texture: "); } From 7b3298ad417982399bf0ad2aebcc5d706ed257cc Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:02:08 +0200 Subject: [PATCH 214/285] [popsfit] const methods --- src/popsift/sift_octave.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/popsift/sift_octave.h b/src/popsift/sift_octave.h index b6146c47..927b1cb1 100755 --- a/src/popsift/sift_octave.h +++ b/src/popsift/sift_octave.h @@ -105,19 +105,19 @@ class Octave inline LinearTexture getIntermDataTexLinear( ) { return _intm_tex_linear; } - inline cudaTextureObject_t getIntermDataTexPoint( ) { + inline cudaTextureObject_t getIntermDataTexPoint( ) const { return _intm_tex_point; } inline LinearTexture getDataTexLinear( ) { return _data_tex_linear; } - inline cudaTextureObject_t getDataTexPoint( ) { + inline cudaTextureObject_t getDataTexPoint( ) const { return _data_tex_point; } - inline cudaSurfaceObject_t getDataSurface( ) { + inline cudaSurfaceObject_t getDataSurface( ) const { return _data_surf; } - inline cudaSurfaceObject_t getIntermediateSurface( ) { + inline cudaSurfaceObject_t getIntermediateSurface( ) const { return _intm_surf; } From 32037e95bf820f225d968ad717887b48b198a2e4 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:03:56 +0200 Subject: [PATCH 215/285] [popsfit] improvements --- src/popsift/s_desc_notile.cu | 2 +- src/popsift/s_extrema.cu | 2 +- src/popsift/s_orientation.cu | 8 +++++--- src/popsift/sift_pyramid.cu | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/popsift/s_desc_notile.cu b/src/popsift/s_desc_notile.cu index 8b12c7f6..a336898b 100644 --- a/src/popsift/s_desc_notile.cu +++ b/src/popsift/s_desc_notile.cu @@ -77,7 +77,7 @@ void ext_desc_notile_sub( const float x, const float y, const int level, } } - for( int i=0; i<8; i++ ) + for( int i=0; i<8; ++i) { dpt[i] += popsift::shuffle_down( dpt[i], 4, 8 ); // add n+4 dpt[i] += popsift::shuffle_down( dpt[i], 2, 8 ); // add n+2 diff --git a/src/popsift/s_extrema.cu b/src/popsift/s_extrema.cu index 30944e34..36648e01 100644 --- a/src/popsift/s_extrema.cu +++ b/src/popsift/s_extrema.cu @@ -422,7 +422,7 @@ __device__ inline bool find_extrema_in_dog_sub(cudaTextureObject_t dog, b.y = -D.y; b.z = -D.z; - if( solve( A, b ) == false ) { + if(!solve(A, b)) { d.x = 0; d.y = 0; d.z = 0; diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index d0ace5d1..95ef911d 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -355,9 +355,11 @@ void Pyramid::orientation( const Config& conf ) nvtxRangePushA( "filtering grid" ); int ext_total = 0; - for( int o=0; o 0 ) { - ext_total += hct.ext_ct[o]; + for(int o : hct.ext_ct) + { + if( o > 0 ) + { + ext_total += o; } } diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index 75c3d265..a983bc84 100755 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -435,8 +435,9 @@ void Pyramid::writeDescriptor( const Config& conf, ostream& ostr, FeaturesHost* << 1.0f / (sigma * sigma) << " "; if (really) { - for (int i = 0; i<128; i++) { - ostr << desc.features[i] << " "; + for (float feature : desc.features) + { + ostr << feature << " "; } } ostr << endl; From 9a9b5a853078598386064336e3d1fcaa0b4031ec Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:04:22 +0200 Subject: [PATCH 216/285] [popsfit] unused --- src/popsift/sift_pyramid.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/popsift/sift_pyramid.h b/src/popsift/sift_pyramid.h index 14b0b663..26b71f3c 100755 --- a/src/popsift/sift_pyramid.h +++ b/src/popsift/sift_pyramid.h @@ -151,8 +151,8 @@ class Pyramid void descriptors( const Config& conf ); - void debug_out_floats ( float* data, uint32_t pitch, uint32_t height ); - void debug_out_floats_t( float* data, uint32_t pitch, uint32_t height ); +// void debug_out_floats ( float* data, uint32_t pitch, uint32_t height ); +// void debug_out_floats_t( float* data, uint32_t pitch, uint32_t height ); void readDescCountersFromDevice( ); void readDescCountersFromDevice( cudaStream_t s ); @@ -165,7 +165,7 @@ class Pyramid private: // debug - void print_tables_host( ); +// void print_tables_host( ); public: }; From 0b0c2e0da0bc2db9677ecbcee6d9386128535bfa Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:57:14 +0200 Subject: [PATCH 217/285] [popsift] unused variables --- src/popsift/s_pyramid_build.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/popsift/s_pyramid_build.cu b/src/popsift/s_pyramid_build.cu index 326ec325..8873ca5c 100755 --- a/src/popsift/s_pyramid_build.cu +++ b/src/popsift/s_pyramid_build.cu @@ -499,10 +499,8 @@ void Pyramid::build_pyramid( const Config& conf, ImageBase* base ) } else if( conf.getScalingMode() == Config::ScaleDirect ) { GaussTableChoice useGauss = ( conf.getGaussMode() == Config::VLFeat_Relative ) ? Interpolated_FromPrevious : NotInterpolated_FromPrevious; - for( int level=0; level<_levels; level++ ) { - const int width = oct_obj.getWidth(); - const int height = oct_obj.getHeight(); - + for( int level=0; level<_levels; level++ ) + { if( level == 0 ) { horiz_from_input_image( conf, base, octave, stream ); From e3279a30d6e0e4e722362a603ed4766fd3dcf841 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 16 May 2020 01:57:31 +0200 Subject: [PATCH 218/285] [popsift] wrong return type --- src/popsift/s_extrema.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/popsift/s_extrema.cu b/src/popsift/s_extrema.cu index 36648e01..5c1acc44 100644 --- a/src/popsift/s_extrema.cu +++ b/src/popsift/s_extrema.cu @@ -187,7 +187,7 @@ public: } inline __device__ - int verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const + bool verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const { return true; } @@ -232,7 +232,7 @@ public: } inline __device__ - int verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const + bool verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const { // reject if outside of image bounds or far outside DoG bounds return ( ( xn < 0.0f || @@ -284,7 +284,7 @@ public: } inline __device__ - int verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const + bool verify( float xn, float yn, float sn, int width, int height, int maxlevel ) const { // reject if outside of image bounds or far outside DoG bounds return ( ( xn < 0.0f || From 806fd9575772970da6b1eaceab474239fc7c9bca Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sun, 17 May 2020 15:05:51 +0200 Subject: [PATCH 219/285] [docker] switch popsift-deps --- Dockerfile | 2 +- Dockerfile_deps | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 67c4f8cd..5b691c45 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ ARG CUDA_TAG=10.2 ARG OS_TAG=18.04 -FROM alicevision/popsift:deps-cuda${CUDA_TAG}-ubuntu${OS_TAG} +FROM alicevision/popsift-deps:cuda${CUDA_TAG}-ubuntu${OS_TAG} LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # use CUDA_TAG to select the image version to use diff --git a/Dockerfile_deps b/Dockerfile_deps index 2a2962bf..bed8cc59 100644 --- a/Dockerfile_deps +++ b/Dockerfile_deps @@ -7,7 +7,7 @@ LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # see https://hub.docker.com/r/nvidia/cuda/ # # For example, to create a ubuntu 16.04 with cuda 8.0 for development, use -# docker build --build-arg CUDA_TAG=8.0 --tag alicevision/popsift:deps-cuda${CUDA_TAG}-ubuntu${OS_TAG} . +# docker build --build-arg CUDA_TAG=8.0 --tag alicevision/popsift-deps:cuda${CUDA_TAG}-ubuntu${OS_TAG} . # # then execute with nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) # docker run -it --runtime=nvidia popsift_deps From ca2aedba00cb850c378f967a10ac98f947fc5424 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 22:04:18 +0200 Subject: [PATCH 220/285] [popsift] removed unimplemented functions --- src/popsift/sift_pyramid.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/popsift/sift_pyramid.h b/src/popsift/sift_pyramid.h index 26b71f3c..7ec91807 100755 --- a/src/popsift/sift_pyramid.h +++ b/src/popsift/sift_pyramid.h @@ -151,9 +151,6 @@ class Pyramid void descriptors( const Config& conf ); -// void debug_out_floats ( float* data, uint32_t pitch, uint32_t height ); -// void debug_out_floats_t( float* data, uint32_t pitch, uint32_t height ); - void readDescCountersFromDevice( ); void readDescCountersFromDevice( cudaStream_t s ); void writeDescCountersToDevice( ); From 96d373204cc3ebffc4f0b91e2695c466d3e55b2f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 22:12:42 +0200 Subject: [PATCH 221/285] [cmake] CMAKE_RUNTIME_OUTPUT_DIRECTORY and CMAKE_LIBRARY_OUTPUT_DIRECTORY --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a40d13d9..8f66847e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,8 @@ cmake_minimum_required(VERSION 3.12) project(PopSift VERSION 1.0.0 LANGUAGES CXX) # Set build path as a folder named as the platform (linux, windows, darwin...) plus the processor type -set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") -set(LIBRARY_OUTPUT_PATH "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) From a6b1be5cc51c37cf1bd32455e5dcbaab6bc89d96 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 22:13:49 +0200 Subject: [PATCH 222/285] [cmake] conditional project for apps --- src/application/CMakeLists.txt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index a368746f..335191ab 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -1,12 +1,12 @@ -cmake_minimum_required(VERSION 3.12) -project(PopsiftDemo LANGUAGES CXX) +if(NOT CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) + # I am top-level project, i.e. I am not being include by another project + cmake_minimum_required(VERSION 3.12) + project(PopsiftDemo LANGUAGES CXX) -option(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) - -# if this is used as a stand-alone project we need to tell whether to use PIC -if(NOT DEFINED BUILD_SHARED_LIBS) + option(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" ON) set(CMAKE_POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) + endif() # enable -fPIE for executables when -fpic @@ -88,7 +88,6 @@ target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) set_target_properties(popsift-match PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") - ############################################################# # installation ############################################################# From afd0685d0a7e01753448797a95de9b4b558c8c16 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 22:14:08 +0200 Subject: [PATCH 223/285] [cmake] apps CMAKE_RUNTIME_OUTPUT_DIRECTORY and CMAKE_LIBRARY_OUTPUT_DIRECTORY --- src/application/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 335191ab..b4774f1b 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -7,6 +7,8 @@ if(NOT CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) option(BUILD_SHARED_LIBS "Build shared libraries" ON) set(CMAKE_POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") endif() # enable -fPIE for executables when -fpic @@ -72,7 +74,6 @@ target_include_directories(popsift-demo PUBLIC ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-demo PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-demo PUBLIC PopSift::popsift ${PD_LINK_LIBS}) -set_target_properties(popsift-demo PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") ############################################################# # popsift-match @@ -87,7 +88,6 @@ target_include_directories(popsift-match PUBLIC ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) -set_target_properties(popsift-match PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") ############################################################# # installation ############################################################# From a565915119535222184054800ba4c50438a46198 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 22:14:22 +0200 Subject: [PATCH 224/285] [cmake] apps use GNUInstallDirs --- src/application/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index b4774f1b..3b28cec8 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -5,6 +5,9 @@ if(NOT CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) option(PopSift_BOOST_USE_STATIC_LIBS "Link examples with static Boost libraries" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" ON) + + include(GNUInstallDirs) + set(CMAKE_POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") @@ -92,4 +95,4 @@ target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) # installation ############################################################# -install(TARGETS popsift-demo DESTINATION bin) +install(TARGETS popsift-demo DESTINATION ${CMAKE_INSTALL_BINDIR}) From f432e6a9b1693194390673b099efa20492d9ab2d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 22:29:29 +0200 Subject: [PATCH 225/285] [doc] updates changes --- CHANGES.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e27e3819..0f53088b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,7 +17,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.0] - YYYY-MM-DD -- Support for cuda 9 and 10 -- Support for RTX architecture -- Optional grid filtering -- Dockerfile +### Added +- Improved checks for CUDA textures [PR](https://github.com/alicevision/popsift/pull/89) +- CMake: Improved support for all Cuda CC [PR](https://github.com/alicevision/popsift/pull/75) +- Support for Cuda CC 7 cards (RTX 2080) [PR](https://github.com/alicevision/popsift/pull/67) +- Support for Boost 1.70 [PR](https://github.com/alicevision/popsift/pull/65) + +### Fixed +- CMake: fixes to allow building on Windows using vcpkg [PR](https://github.com/alicevision/popsift/pull/92) +- Fix race condition [PR](https://github.com/alicevision/popsift/pull/82) + +### Changed +- Improved resource releasing [PR](https://github.com/alicevision/popsift/pull/71) + +### Removed +- Remove boost dependency from the main library [PR](https://github.com/alicevision/popsift/pull/81) + + +## 2019 + +- Bugfix: Support for images with different resolutions [PR](https://github.com/alicevision/popsift/pull/58) + + +## 2018 + +- CMake: Auto-build export symbols for shared libs on Windows [PR](https://github.com/alicevision/popsift/pull/54) +- Bugfix: freeing page-aligned memory on Win32 [PR](https://github.com/alicevision/popsift/pull/53) +- Paper published @MMSys18 (https://dl.acm.org/doi/10.1145/3204949.3208136) +- Docker support [PR](https://github.com/alicevision/popsift/pull/46) +- Appveyor CI windows [PR](https://github.com/alicevision/popsift/pull/41) +- Support for Cuda 9 [PR](https://github.com/alicevision/popsift/pull/38) +- Thrust with Cuda 7 [PR](https://github.com/alicevision/popsift/pull/35) + + +## 2017 + +- Grid filtering [PR](https://github.com/alicevision/popsift/pull/30) +- Improved Gauss filtering [PR](https://github.com/alicevision/popsift/pull/24) +- Support asynchronous SIFT extraction [PR](https://github.com/alicevision/popsift/pull/22) +- Windows port [PR](https://github.com/alicevision/popsift/pull/18) + + +## 2016 + +- Switch to modern CMake [PR](https://github.com/alicevision/popsift/pull/14) +- Travis CI Linux [PR](https://github.com/alicevision/popsift/pull/8) + - First open-source release From 703f70a6ca8273ee79335b8174e39d23778483ee Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 30 May 2020 23:39:07 +0200 Subject: [PATCH 226/285] [popsift] removed unimplemented function --- src/popsift/sift_pyramid.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/popsift/sift_pyramid.h b/src/popsift/sift_pyramid.h index 7ec91807..cfc6eaae 100755 --- a/src/popsift/sift_pyramid.h +++ b/src/popsift/sift_pyramid.h @@ -160,11 +160,6 @@ class Pyramid void clone_device_descriptors_sub( const Config& conf, FeaturesDev* features ); -private: - // debug -// void print_tables_host( ); - -public: }; } // namespace popsift From 638bf75b32d3ccadfb6079adeaf8e5c84aa50347 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 29 Jun 2020 13:30:22 +0200 Subject: [PATCH 227/285] [doc] add doxygen and sphinx --- CMakeLists.txt | 6 + cmake/FindSphinx.cmake | 12 ++ doc/CMakeLists.txt | 39 ++++ doc/sphinx/Makefile | 20 ++ doc/sphinx/make.bat | 35 ++++ doc/sphinx/requirements.txt | 4 + doc/sphinx/source/Doxyfile | 267 ++++++++++++++++++++++++++ doc/sphinx/source/about/about.rst | 53 +++++ doc/sphinx/source/api/api.rst | 25 +++ doc/sphinx/source/api/usage.rst | 13 ++ doc/sphinx/source/biblio.bib | 25 +++ doc/sphinx/source/bibliography.rst | 5 + doc/sphinx/source/conf.py | 84 ++++++++ doc/sphinx/source/index.rst | 36 ++++ doc/sphinx/source/install/install.rst | 250 ++++++++++++++++++++++++ 15 files changed, 874 insertions(+) create mode 100644 cmake/FindSphinx.cmake create mode 100644 doc/CMakeLists.txt create mode 100644 doc/sphinx/Makefile create mode 100644 doc/sphinx/make.bat create mode 100644 doc/sphinx/requirements.txt create mode 100644 doc/sphinx/source/Doxyfile create mode 100644 doc/sphinx/source/about/about.rst create mode 100644 doc/sphinx/source/api/api.rst create mode 100644 doc/sphinx/source/api/usage.rst create mode 100644 doc/sphinx/source/biblio.bib create mode 100644 doc/sphinx/source/bibliography.rst create mode 100644 doc/sphinx/source/conf.py create mode 100644 doc/sphinx/source/index.rst create mode 100644 doc/sphinx/source/install/install.rst diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f66847e..6b1ed33a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) +option(PopSift_BUILD_DOCS "Build PopSift documentation." ON) option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) @@ -196,6 +197,10 @@ endif() add_subdirectory(src) +if(PopSift_BUILD_DOCS) + add_subdirectory(doc) +endif() + if(PopSift_USE_TEST_CMD) add_subdirectory(testScripts) endif() @@ -219,6 +224,7 @@ message(STATUS "PopSift version: " ${PROJECT_VERSION}) message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS}) message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES}) +message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS}) message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE}) message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake new file mode 100644 index 00000000..9b9d2b9e --- /dev/null +++ b/cmake/FindSphinx.cmake @@ -0,0 +1,12 @@ +#Look for an executable called sphinx-build +find_program(SPHINX_EXECUTABLE + NAMES sphinx-build + HINTS ${SPHINX_ROOT} + DOC "Path to sphinx-build executable") + +include(FindPackageHandleStandardArgs) + +#Handle standard arguments to find_package like REQUIRED and QUIET +find_package_handle_standard_args(Sphinx + "Failed to find sphinx-build executable" + SPHINX_EXECUTABLE) \ No newline at end of file diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt new file mode 100644 index 00000000..5468184f --- /dev/null +++ b/doc/CMakeLists.txt @@ -0,0 +1,39 @@ +find_package(Doxygen REQUIRED) + +file(GLOB_RECURSE ALL_PUBLIC_HEADERS ${PROJECT_SOURCE_DIR}/src/popsift/*.h*) +set(CCTAG_OTHER_DOC_SOURCES ${PROJECT_SOURCE_DIR}/README.md ${PROJECT_SOURCE_DIR}/INSTALL.md) +set(DOXYGEN_USE_MDFILE_AS_MAINPAGE ${PROJECT_SOURCE_DIR}/README.md) +set(DOXYGEN_PROJECT_BRIEF "A faithful implementation of the SIFT algorithm in CUDA.") +set(DOXYGEN_GENERATE_XML YES) +set(DOXYGEN_GENERATE_TREEVIEW YES) +set(DOXYGEN_GENERATE_DEPRECATEDLIST YES) +set(DOXYGEN_SORT_BRIEF_DOCS YES) +set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/doxygen) +set(DOXYGEN_INDEX_FILE ${DOXYGEN_OUTPUT_DIRECTORY}/xml/index.xml) + +doxygen_add_docs(doxygen + ${ALL_PUBLIC_HEADERS} ${CCTAG_OTHER_DOC_SOURCES} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Generate the doc") + + + +find_package(Sphinx REQUIRED) + +set(SPHINX_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/source) +set(SPHINX_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sphinx) + +add_custom_target(sphinx ALL + COMMAND + ${SPHINX_EXECUTABLE} -b html + # Tell Breathe where to find the Doxygen output + -Dbreathe_projects.PopSift=${DOXYGEN_OUTPUT_DIRECTORY}/xml + ${SPHINX_SOURCE} ${SPHINX_BUILD} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS + doxygen + # Other docs files you want to track should go here (or in some variable) + ${CMAKE_CURRENT_SOURCE_DIR}/sphinx/source/index.rst + ${DOXYGEN_INDEX_FILE} + # MAIN_DEPENDENCY ${SPHINX_SOURCE}/conf.py + COMMENT "Generating documentation with Sphinx") \ No newline at end of file diff --git a/doc/sphinx/Makefile b/doc/sphinx/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/doc/sphinx/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/sphinx/make.bat b/doc/sphinx/make.bat new file mode 100644 index 00000000..6247f7e2 --- /dev/null +++ b/doc/sphinx/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/sphinx/requirements.txt b/doc/sphinx/requirements.txt new file mode 100644 index 00000000..45a2e179 --- /dev/null +++ b/doc/sphinx/requirements.txt @@ -0,0 +1,4 @@ +sphinx>=1.9.0 +sphinx_rtd_theme +sphinxcontrib-bibtex +breathe diff --git a/doc/sphinx/source/Doxyfile b/doc/sphinx/source/Doxyfile new file mode 100644 index 00000000..3a42393f --- /dev/null +++ b/doc/sphinx/source/Doxyfile @@ -0,0 +1,267 @@ +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = PopSift +PROJECT_NUMBER = 1.0.0 +PROJECT_BRIEF = "A faithful implementation of the SIFT algorithm in CUDA." +PROJECT_LOGO = +OUTPUT_DIRECTORY = ../build +CREATE_SUBDIRS = NO +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" "The $name widget" "The $name file" is provides specifies contains represents a an the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +TCL_SUBST = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +EXTENSION_MAPPING = +MARKDOWN_SUPPORT = YES +TOC_INCLUDE_HEADINGS = 0 +AUTOLINK_SUPPORT = YES +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +LOOKUP_CACHE_SIZE = 0 +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_PACKAGE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_INCLUDE_FILES = YES +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +INPUT = ../../../src +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.idl *.ddl *.odl *.h *.hh *.hxx *.hpp *.h++ *.cs *.d *.php *.php4 *.php5 *.phtml *.inc *.m *.markdown *.md *.mm *.dox *.py *.pyw *.f90 *.f95 *.f03 *.f08 *.f *.for *.tcl *.vhd *.vhdl *.ucf *.qsf +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = */.git/* */.svn/* */.hg/* */CMakeFiles/* */_CPack_Packages/* DartConfiguration.tcl CMakeLists.txt CMakeCache.txt +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +CLANG_ASSISTED_PARSING = NO +CLANG_OPTIONS = +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +GENERATE_HTML = NO +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_STYLESHEET = +HTML_EXTRA_FILES = +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = NO +HTML_DYNAMIC_SECTIONS = NO +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +GENERATE_TREEVIEW = YES +ENUM_VALUES_PER_LINE = 4 +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +USE_MATHJAX = NO +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +SEARCHENGINE_URL = +SEARCHDATA_FILE = searchdata.xml +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4 +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +LATEX_EXTRA_STYLESHEET = +LATEX_EXTRA_FILES = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_SOURCE_CODE = NO +LATEX_BIB_STYLE = plain +LATEX_TIMESTAMP = NO +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +RTF_SOURCE_CODE = NO +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_SUBDIR = +MAN_LINKS = NO +GENERATE_XML = YES +XML_OUTPUT = xml +XML_PROGRAMLISTING = YES +GENERATE_DOCBOOK = NO +DOCBOOK_OUTPUT = docbook +DOCBOOK_PROGRAMLISTING = NO +GENERATE_AUTOGEN_DEF = NO +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +EXTERNAL_PAGES = YES +PERL_PATH = /usr/bin/perl +CLASS_DIAGRAMS = YES +MSCGEN_PATH = +DIA_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +DOT_NUM_THREADS = 0 +DOT_FONTNAME = Helvetica +DOT_FONTSIZE = 10 +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +UML_LIMIT_NUM_FIELDS = 10 +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +INTERACTIVE_SVG = NO +DOT_PATH = /usr/bin +DOTFILE_DIRS = +MSCFILE_DIRS = +DIAFILE_DIRS = +PLANTUML_JAR_PATH = +PLANTUML_CFG_FILE = +PLANTUML_INCLUDE_PATH = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = YES +GENERATE_LEGEND = YES +DOT_CLEANUP = YES diff --git a/doc/sphinx/source/about/about.rst b/doc/sphinx/source/about/about.rst new file mode 100644 index 00000000..52e76402 --- /dev/null +++ b/doc/sphinx/source/about/about.rst @@ -0,0 +1,53 @@ +About +===== + + + +License +~~~~~~~ + +PopSift is licensed under `MPLv2 license `_. + +More info about the license and what you can do with the code can be found at `tldrlegal website `_ + +SIFT was patented in the United States from 1999-03-08 to 2020-03-28. +See the `patent link `_ for more information. +PopSift license only concerns the PopSift source code and does not release users of this code from any requirements that may arise from patents. + +Contact us +~~~~~~~~~~ + +You can contact us on the public mailing list at +`alicevision@googlegroups.com `_ + +You can also contact us privately at +`alicevision-team@googlegroups.com `_ + + +Cite us +~~~~~~~ + +If you want to cite this work in your publication, please use the following + +.. code:: bibtex + + @inproceedings{Griwodz2018Popsift, + author = {Griwodz, Carsten and Calvet, Lilian and Halvorsen, P{\aa}l}, + title = {Popsift: A Faithful SIFT Implementation for Real-time Applications}, + booktitle = {Proceedings of the 9th {ACM} Multimedia Systems Conference}, + series = {MMSys '18}, + year = {2018}, + isbn = {978-1-4503-5192-8}, + location = {Amsterdam, Netherlands}, + pages = {415--420}, + numpages = {6}, + doi = {10.1145/3204949.3208136}, + acmid = {3208136}, + publisher = {ACM}, + address = {New York, NY, USA}, + } + +Acknowledgements +~~~~~~~~~~~~~~~~ + +This has been developed in the context of the `European project POPART `_ founded by European Union’s Horizon 2020 research and innovation programme under `grant agreement No 644874 `_. \ No newline at end of file diff --git a/doc/sphinx/source/api/api.rst b/doc/sphinx/source/api/api.rst new file mode 100644 index 00000000..5e9ef3cc --- /dev/null +++ b/doc/sphinx/source/api/api.rst @@ -0,0 +1,25 @@ +API References +============== + + +Main Classes +~~~~~~~~~~~~ + +.. doxygenclass:: SiftJob + :members: + +.. doxygenclass:: PopSift + :members: + +.. doxygenstruct:: popsift::Config + :members: + + +Functions +~~~~~~~~~ + + + + +Utility Classes +~~~~~~~~~~~~~~~ diff --git a/doc/sphinx/source/api/usage.rst b/doc/sphinx/source/api/usage.rst new file mode 100644 index 00000000..b226d240 --- /dev/null +++ b/doc/sphinx/source/api/usage.rst @@ -0,0 +1,13 @@ +Library usage +============= + + + + +Detection +~~~~~~~~~ + + + + + diff --git a/doc/sphinx/source/biblio.bib b/doc/sphinx/source/biblio.bib new file mode 100644 index 00000000..f57013dd --- /dev/null +++ b/doc/sphinx/source/biblio.bib @@ -0,0 +1,25 @@ +@inproceedings{Griwodz2018Popsift, + author = {Griwodz, Carsten and Calvet, Lilian and Halvorsen, P{\aa}l}, + title = {Popsift: A Faithful SIFT Implementation for Real-time Applications}, + booktitle = {Proceedings of the 9th {ACM} Multimedia Systems Conference}, + series = {MMSys '18}, + year = {2018}, + isbn = {978-1-4503-5192-8}, + location = {Amsterdam, Netherlands}, + pages = {415--420}, + numpages = {6}, + doi = {10.1145/3204949.3208136}, + acmid = {3208136}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@article{Lowe2003, + author = {Lowe, DG}, + doi = {10.1023/B:VISI.0000029664.99615.94}, + file = {:home/alcov/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Lowe - 2004 - Distinctive image features from scale-invariant keypoints.pdf:pdf}, + journal = {International journal of computer vision}, + pages = {1--29}, + title = {{Distinctive image features from scale-invariant keypoints}}, + year = {2004} +} diff --git a/doc/sphinx/source/bibliography.rst b/doc/sphinx/source/bibliography.rst new file mode 100644 index 00000000..3e04dc7d --- /dev/null +++ b/doc/sphinx/source/bibliography.rst @@ -0,0 +1,5 @@ +Bibliography +============ + +.. bibliography:: biblio.bib + :all: \ No newline at end of file diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py new file mode 100644 index 00000000..b85cd593 --- /dev/null +++ b/doc/sphinx/source/conf.py @@ -0,0 +1,84 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +import subprocess, os + +def configure_doxyfile(input_dir, output_dir): + with open('Doxyfile.in', 'r') as file : + filedata = file.read() + + filedata = filedata.replace('@DOXYGEN_INPUT_DIR@', input_dir) + filedata = filedata.replace('@DOXYGEN_OUTPUT_DIR@', output_dir) + + with open('Doxyfile', 'w') as file: + file.write(filedata) + +# Check if we're running on Read the Docs' servers +read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' + +breathe_projects = {} + +if read_the_docs_build: + # run doxygen before to generate the xml + output_dir = '../build' + subprocess.call('doxygen', shell=True) + breathe_projects['PopSift'] = output_dir + '/xml' + + + + +# -- Project information ----------------------------------------------------- + +project = u'PopSift' +copyright = '2020, AliceVision' +author = 'AliceVision' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['breathe', 'sphinxcontrib.bibtex'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +source_suffix = ['.rst', '.md'] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# breathe_projects = { +# "PopSift": "../../doxygen/xml/", +# } + +# Breathe Configuration +breathe_default_project = 'PopSift' \ No newline at end of file diff --git a/doc/sphinx/source/index.rst b/doc/sphinx/source/index.rst new file mode 100644 index 00000000..457d54ca --- /dev/null +++ b/doc/sphinx/source/index.rst @@ -0,0 +1,36 @@ +PopSift Library +============= + +PopSift is an open-source implementation of the SIFT algorithm in CUDA :cite:`Griwodz2018Popsift`. +PopSift tries to stick as closely as possible to David Lowe's famous paper :cite:`Lowe2003`, while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Install + + install/install + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: API Documentation + + api/usage + api/api + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: About + + about/about + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: References + + bibliography diff --git a/doc/sphinx/source/install/install.rst b/doc/sphinx/source/install/install.rst new file mode 100644 index 00000000..fb6eb014 --- /dev/null +++ b/doc/sphinx/source/install/install.rst @@ -0,0 +1,250 @@ +Requirements +============ + +Hardware +~~~~~~~~ + +PopSift is a GPU implementation that requires an NVIDIA GPU card with a CUDA compute capability >= 3.0 (including, e.g. the GT 650M). +The code is originally developed with the compute capability 5.2 card GTX 980 Ti in mind. + +You can check your `NVIDIA GPU card CC support here `_ or on the `NVIDIA dev page `_. +If you do not have a NVIDIA card you will still able to compile and use the CPU version. + +Here are the minimum hardware requirements for PopSift: + ++--------------------------------------------------------------------------+ +| Minimum requirements | ++===================+======================================================+ +| Operating systems | Windows x64, Linux, macOS | ++-------------------+------------------------------------------------------+ +| CPU | Recent Intel or AMD cpus | ++-------------------+------------------------------------------------------+ +| RAM Memory | 8 GB | ++-------------------+------------------------------------------------------+ +| Hard Drive | No particular requirements | ++-------------------+------------------------------------------------------+ +| GPU | NVIDIA CUDA-enabled GPU (compute capability >= 3.5) | ++-------------------+------------------------------------------------------+ + + + +Software +~~~~~~~~ + +The core library depends only on Cuda >= 7.0 + +The library includes a few sample applications that show how to use the library. +They require + +* Boost >= 1.55 (required components atomic, chrono, date-time, system, thread) + +* [optionally] DevIL (libdevil-dev) can be used to load a broader range of image formats, otherwise only pgm is supported. + + + +------------ + + +vcpkg +===== + +`vcpkg `_ is a cross-platform (Windows, Linux and MacOS), open-source package manager created by Microsoft. + +We are planning to release a port of the library so that it can be easily built using the package manager on all supported platforms. +Stay tuned! + + +------------ + +Building the library +==================== + +Building tools +~~~~~~~~~~~~~~ + +Required tools: + +* CMake >= 3.14 to build the code +* Git +* C/C++ compiler supporting the C++11 standard (gcc >= 4.6 or visual studio or clang) +* CUDA >= 7.0 + + + +Dependencies +~~~~~~~~~~~~ + +vcpkg ++++++ + +vcpkg can be used to install all the dependencies on all the supported platforms. +This is particularly useful on Windows. +To install the dependencies: + +.. code:: shell + + vcpkg install cuda devil boost-system boost-program-options boost-thread boost-filesystem + +You can add the flag :code:`--triplet` to specify the architecture and the version you want to build. +For example: + +* :code:`--triplet x64-windows` will build the dynamic version for Windows 64 bit + +* :code:`--triplet x64-windows-static` will build the static version for Windows 64 bit + +* :code:`--triplet x64-linux-dynamic` will build the dynamic version for Linux 64 bit + +and so on. +More information can be found `here `_ + +Linux ++++++ + +On Linux you can install from the package manager: + +For Ubuntu/Debian package system: + +.. code:: shell + + sudo apt-get install g++ git-all libboost-all-dev libdevil-dev + + +For CentOS package system: + +.. code:: shell + + sudo yum install gcc-c++ git boost-devel devil + + +MacOS ++++++ + +On MacOs using `Homebrew `_ install the following packages: + +.. code:: shell + + brew install git boost devil + + +Getting the sources +~~~~~~~~~~~~~~~~~~~~ + +.. code:: shell + + git clone https://github.com/alicevision/PopSift.git + + +CMake configuration +~~~~~~~~~~~~~~~~~~~ + +From PopSift root folder you can run cmake: + +.. code:: shell + + mkdir build && cd build + cmake .. + make -j `nproc` + +On Windows add :code:`-G "Visual Studio 16 2019" -A x64` to generate the Visual Studio solution according to your VS version (`see CMake documentation `_). + +If you are using the dependencies built with VCPKG you need to pass :code:`-DCMAKE_TOOLCHAIN_FILE=path/to/vcpkg/scripts/buildsystems/vcpkg.cmake` at cmake step to let it know where to find the dependencies. + + +CMake options ++++++++++++++ + +CMake configuration can be controlled by changing the values of the following variables (here with their default value) + + +* :code:`BUILD_SHARED_LIBS:BOOL=ON` to enable/disable the building shared libraries + +* :code:`PopSift_BUILD_EXAMPLES:BOOL=ON` to enable/disable the building of applications + +* :code:`PopSift_BUILD_DOC:BOOL=OFF` to enable/disable building this documentation and the Doxygen one. + +For example, if you do not want to build the applications, you have to pass :code:`-DPopSift_BUILD_EXAMPLES:BOOL=OFF` and so on. + + +------------ + + +PopSift as third party +==================== + +When you install PopSift a file :code:`PopSiftConfig.cmake` is installed in :code:`/lib/cmake/PopSift/` that allows you to import the library in your CMake project. +In your :code:`CMakeLists.txt` file you can add the dependency in this way: + +.. code-block:: + :linenos: + + # Find the package from the PopSiftConfig.cmake + # in /lib/cmake/PopSift/. Under the namespace PopSift:: + # it exposes the target PopSift that allows you to compile + # and link with the library + find_package(PopSift CONFIG REQUIRED) + ... + # suppose you want to try it out in a executable + add_executable(popsiftTest yourfile.cpp) + # add link to the library + target_link_libraries(popsiftTest PUBLIC PopSift::PopSift) + +Then, in order to build just pass the location of :code:`PopSiftConfig.cmake` from the cmake command line: + +.. code:: shell + + cmake .. -DPopSift_DIR=/lib/cmake/PopSift/ + + +------------ + + + +Docker image +============ + +A docker image can be built using the Ubuntu based :code:`Dockerfile`, which is based on nvidia/cuda image (https://hub.docker.com/r/nvidia/cuda/ ) + + +Building the dependency image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We provide a :code:`Dockerfile_deps` containing a cuda image with all the necessary PopSift dependencies installed. + +A parameter :code:`CUDA_TAG` can be passed when building the image to select the cuda version. +Similarly, :code:`OS_TAG` can be passed to select the Ubuntu version. +By default, :code:`CUDA_TAG=10.2` and :code:`OS_TAG=18.04` + +For example to create the dependency image based on ubuntu 18.04 with cuda 8.0 for development, use + +.. code:: shell + + docker build --build-arg CUDA_TAG=8.0 --tag alicevision/popsift-deps:cuda8.0-ubuntu18.04 -f Dockerfile_deps . + +The complete list of available tags can be found on the nvidia [dockerhub page](https://hub.docker.com/r/nvidia/cuda/) + + +Building the PopSift image +~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you built the dependency image, you can build the popsift image in the same manner using :code:`Dockerfile`: + +.. code:: shell + + docker build --tag alicevision/popsift:cuda8.0-ubuntu18.04 . + + +Running the PopSift image +~~~~~~~~~~~~~~~~~~~~~~~ + +In order to run the image nvidia docker is needed: see the `installation instruction `_. +Once installed, the docker can be run, e.g., in interactive mode with + +.. code:: shell + + docker run -it --runtime=nvidia alicevision/popsift:cuda8.0-ubuntu18.04 + + +Official images on DockeHub +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check the docker hub `PopSift repository `_ for the available images. \ No newline at end of file From 564cfec196dd88df9c91290209657817007f012a Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 29 Jun 2020 13:32:42 +0200 Subject: [PATCH 228/285] [doc] some fixing and completion to doxygen --- src/popsift/common/device_prop.h | 51 +++-- src/popsift/features.h | 16 +- src/popsift/popsift.h | 122 +++++++++--- src/popsift/sift_conf.h | 325 ++++++++++++++++++++----------- src/popsift/sift_extremum.h | 39 ++-- src/popsift/sift_octave.h | 12 +- 6 files changed, 390 insertions(+), 175 deletions(-) diff --git a/src/popsift/common/device_prop.h b/src/popsift/common/device_prop.h index 8828fd4a..ed5db2b2 100644 --- a/src/popsift/common/device_prop.h +++ b/src/popsift/common/device_prop.h @@ -10,8 +10,12 @@ #include #include -namespace popsift { namespace cuda { +namespace popsift { +namespace cuda { +/** + * @brief A class to recover, query and print the information about the cuda device. + */ class device_prop_t { int _num_devices; @@ -27,36 +31,48 @@ class device_prop_t device_prop_t( ); ~device_prop_t( ); + /** + * @brief Print the information about the device. + */ void print( ); + + /** + * @brief Set the device to use. + * @param[in] n The index of the device to use. + * @param[in] print_choice Whether to print information about the chosen device. + */ void set( int n, bool print_choice = false ); - /** Check if a request exceeds the current CUDA device's limit in + /** + * @brief Check if a request exceeds the current CUDA device's limit in * texture2Dlinear dimensions. texture2Dlinear is based on CUDA memory that * can be accessed directly (i.e. no CudaArray). * @param[in,out] width Desired width of the texture. * @param[in,out] height Desired height of the texture. * @param[in] printWarn if true, print warnings to cerr if desired width * or height exceeds limits. - * @return { true if the desired width and height are possible. - * false if one or both of the desired width and height are impossible. - * The desired width or height (or both) are replaced by the limit.} + * @return \p true if the desired width and height are possible. + * \p false if one or both of the desired width and height are impossible. + * The desired width or height (or both) are replaced by the limit. */ bool checkLimit_2DtexLinear( int& width, int& height, bool printWarn ) const; - /** Check if a request exceeds the current CUDA device's limit in + /** + * @brief Check if a request exceeds the current CUDA device's limit in * texture2D dimensions. texture2D is based on CUDA Arrays, which have * invisible layout and can only be filled with cudaMemcpy. * @param[in,out] width Desired width of the texture. * @param[in,out] height Desired height of the texture. * @param[in] printWarn if true, print warnings to cerr if desired width * or height exceeds limits. - * @return { true if the desired width and height are possible. - * false if one or both of the desired width and height are impossible. - * The desired width or height (or both) are replaced by the limit.} + * @return \p true if the desired width and height are possible. + * \p false if one or both of the desired width and height are impossible. + * The desired width or height (or both) are replaced by the limit. */ bool checkLimit_2DtexArray( int& width, int& height, bool printWarn ) const; - /** Check if a request exceeds the current CUDA device's limit in + /** + * @brief Check if a request exceeds the current CUDA device's limit in * texture2DLayered dimensions. texture2DLayered refers to a 3D structure, where * interpolation happens only in 3D, effectively creating layers. * @param[in,out] width Desired width of the texture. @@ -64,15 +80,16 @@ class device_prop_t * @param[in,out] layers Desired depth of the texture. * @param[in] printWarn if true, print warnings to cerr if desired width * or height exceeds limits. - * @return { true if the desired width, height and depth are possible. - * false if one or both of the desired width and height are impossible. + * @return \p true if the desired width, height and depth are possible. + * \p false if one or both of the desired width and height are impossible. * The desired width, height and layers are replaced by the limit - * if they exceed it.} + * if they exceed it. */ bool checkLimit_2DtexLayered( int& width, int& height, int& layers, bool printWarn ) const; - /** Check if a request exceeds the current CUDA device's limit in + /** + * @brief Check if a request exceeds the current CUDA device's limit in * surface2DLayered dimensions. surface2DLayered is the writable equivalent * to texture2DLayered, but the width must be given in bytes, not elements. * Since we use float, images cannot be as wide as expected. @@ -81,10 +98,10 @@ class device_prop_t * @param[in,out] layers Desired depth of the texture. * @param[in] printWarn if true, print warnings to cerr if desired width * or height exceeds limits. - * @return { true if the desired width, height and depth are possible. - * false if one or both of the desired width and height are impossible. + * @return \p true if the desired width, height and depth are possible. + * \p false if one or both of the desired width and height are impossible. * The desired width, height and layers are replaced by the limit - * if they exceed it.} + * if they exceed it. */ bool checkLimit_2DsurfLayered( int& width, int& height, int& layers, bool printWarn ) const; diff --git a/src/popsift/features.h b/src/popsift/features.h index 1e1a31dd..3b16f954 100755 --- a/src/popsift/features.h +++ b/src/popsift/features.h @@ -16,7 +16,8 @@ namespace popsift { struct Descriptor; // float features[128]; -/* This is a data structure that is returned to a calling program. +/** + * @brief This is a data structure that is returned to a calling program. * The xpos/ypos information in feature is scale-adapted. */ struct Feature @@ -24,9 +25,11 @@ struct Feature int debug_octave; float xpos; float ypos; - float sigma; // scale; - int num_ori; // number of this extremum's orientations - // remaining entries in desc are 0 + /// scale + float sigma; + /// number of this extremum's orientations + /// remaining entries in desc are 0 + int num_ori; float orientation[ORIENTATION_MAX_COUNT]; Descriptor* desc[ORIENTATION_MAX_COUNT]; @@ -52,7 +55,8 @@ class FeaturesBase inline void setDescriptorCount( int num_ori ) { _num_ori = num_ori; } }; -/* This is a data structure that is returned to a calling program. +/** + * @brief This is a data structure that is returned to a calling program. * _ori is a transparent flat memory holding descriptors * that are referenced by the extrema. * @@ -93,7 +97,7 @@ class FeaturesHost : public FeaturesBase friend class Pyramid; }; -typedef FeaturesHost Features; +using Features = FeaturesHost; std::ostream& operator<<( std::ostream& ostr, const FeaturesHost& feature ); diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 631a7196..446e103a 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -52,16 +52,38 @@ class SiftJob #endif public: - /** Constructor for byte images, value range 0..255 */ + + /** + * @brief Constructor for byte images, value range 0..255 + * @param[in] w the width in pixel of the image + * @param[in] h the height in pixel of the image + * @param[in] imageData the image buffer + */ SiftJob( int w, int h, const unsigned char* imageData ); - /** Constructor for float images, value range [0..1[ */ + /** + * @brief Constructor for float images, value range [0..1[ + * @param[in] w the width in pixel of the image + * @param[in] h the height in pixel of the image + * @param[in] imageData the image buffer + */ SiftJob( int w, int h, const float* imageData ); + /** + * @brief Destructor releases all the resources. + */ ~SiftJob( ); - popsift::FeaturesHost* get(); // should be deprecated, same as getHost() + /** + * @deprecated + * @see getHost() + */ + popsift::FeaturesHost* get(); popsift::FeaturesBase* getBase(); + /** + * @brief + * @return + */ popsift::FeaturesHost* getHost(); popsift::FeaturesDev* getDev(); @@ -72,6 +94,9 @@ class SiftJob void setFeatures( popsift::FeaturesBase* f ); }; +/** + * @brief + */ class PopSift { struct Pipe @@ -91,16 +116,28 @@ class PopSift }; public: + + /** + * @brief Image modes + */ enum ImageMode { + /// byte image, value range 0..255 ByteImages, + /// float images, value range [0..1[ FloatImages }; + /** + * @brief Results for the allocation test. + */ enum AllocTest { + /// the image dimensions are supported by this device's CUDA texture engine. Ok, + /// the input image size exceeds the dimensions of the CUDA Texture used for loading. ImageExceedsLinearTextureLimit, + /// the scaled input image exceeds the dimensions of the CUDA Surface used for the image pyramid. ImageExceedsLayeredSurfaceLimit }; @@ -109,27 +146,46 @@ class PopSift PopSift() = delete; PopSift(const PopSift&) = delete; - /* We support more than 1 streams, but we support only one sigma and one + /** + * @brief We support more than 1 streams, but we support only one sigma and one * level parameters. */ explicit PopSift( ImageMode imode = ByteImages ); - explicit PopSift( const popsift::Config& config, - popsift::Config::ProcessingMode mode = popsift::Config::ExtractingMode, - ImageMode imode = ByteImages ); + + /** + * @brief + * @param config + * @param mode + * @param imode + */ + explicit PopSift(const popsift::Config& config, + popsift::Config::ProcessingMode mode = popsift::Config::ExtractingMode, + ImageMode imode = ByteImages); + + /** + * @brief Release all the resources. + */ ~PopSift(); public: - /** Provide the configuration if you used the PopSift default - * constructor */ + /** + * @brief Provide the configuration if you used the PopSift default + * constructor + */ bool configure( const popsift::Config& config, bool force = false ); + /** + * @brief Release the resources. + */ void uninit( ); - /** Check whether the current CUDA device can support the image + /** + * @brief Check whether the current CUDA device can support the image * resolution (width,height) with the current configuration * based on the card's texture engine. * The function does not check if there is sufficient available * memory. + * * The first part of the test depends on the parameters width and * height. It checks whether the image size is supported by CUDA * 2D linear textures on this card. This is used to load the image @@ -142,8 +198,9 @@ class PopSift * "levels", because it determines the number of levels in each * octave. The CUDA 2D layered texture must support enough depth * for each level. - * @param width The width of the input image - * @param height The height of the input image + * + * @param[in] width The width of the input image + * @param[in] height The height of the input image * @return AllocTest::Ok if the image dimensions are supported by this device's * CUDA texture engine, * AllocTest::ImageExceedsLinearTextureLimit if the input image size @@ -152,41 +209,60 @@ class PopSift * AllocTest::ImageExceedsLayeredSurfaceLimit if the scaled input * image exceeds the dimensions of the CUDA Surface used for the * image pyramid. The scaling factor must be changes to fit in. - * @remark { If you want to call configure() before extracting features, - * you should call configure() before textTextureFit(). } - * @remark { The current CUDA device is determined by a call to - * cudaGetDevice(), card properties are only read once. } + * @remark * If you want to call configure() before extracting features, + * you should call configure() before textTextureFit(). + * @remark * The current CUDA device is determined by a call to + * cudaGetDevice(), card properties are only read once. + * @see AllocTest */ AllocTest testTextureFit( int width, int height ); - /** Create a warning string for an AllocTest error code. */ + /** + * @brief Create a warning string for an AllocTest error code. + */ std::string testTextureFitErrorString( AllocTest err, int w, int h ); - /** Enqueue a byte image, value range 0..255 */ + /** + * @brief Enqueue a byte image, value range [0,255]. + * @param[in] w the width of the image. + * @param[in] h the height of the image. + * @param[in] imageData the image buffer. + * @return the associated job + * @see SiftJob + */ SiftJob* enqueue( int w, int h, const unsigned char* imageData ); - /** Enqueue a float image, value range 0..1 */ + /** + * @brief Enqueue a float image, value range [0,1]. + * @param[in] w the width of the image. + * @param[in] h the height of the image. + * @param[in] imageData the image buffer. + * @return the associated job + * @see SiftJob + */ SiftJob* enqueue( int w, int h, const float* imageData ); /** * @deprecated - * */ + */ inline void uninit( int /*pipe*/ ) { uninit(); } /** * @deprecated - **/ + */ inline bool init( int /*pipe*/, int w, int h ) { _last_init_w = w; _last_init_h = h; return true; } - /** deprecated */ + /** + * @deprecated + */ inline popsift::FeaturesBase* execute( int /*pipe*/, const unsigned char* imageData ) { SiftJob* j = enqueue( _last_init_w, _last_init_h, imageData ); @@ -201,7 +277,7 @@ class PopSift void private_apply_scale_factor( int& w, int& h ); void uploadImages( ); - /* The following method are alternative worker functions for Jobs submitted by + /* The following methods are alternative worker functions for Jobs submitted by * a calling application. The choice of method is made by the mode parameter * in the PopSift constructor. */ diff --git a/src/popsift/sift_conf.h b/src/popsift/sift_conf.h index ecb5ea2f..583a958c 100644 --- a/src/popsift/sift_conf.h +++ b/src/popsift/sift_conf.h @@ -21,14 +21,22 @@ #else #endif -namespace popsift -{ +namespace popsift { +/** + * @brief Struct containing the parameters that control the extraction algorithm + */ struct Config { - Config( ); + Config(); - enum GaussMode { + /** + * @brief The way the gaussian mode is compute. + * + * Each setting allows to mimic and reproduce the behaviour of other Sift implementations. + */ + enum GaussMode + { VLFeat_Compute, VLFeat_Relative, VLFeat_Relative_All, @@ -37,68 +45,145 @@ struct Config Fixed15 }; - enum SiftMode { + /** + * @brief General setting to reproduce the results of other Sift implementations. + */ + enum SiftMode + { + /// Popsift implementation PopSift, + /// OpenCV implementation OpenCV, + /// VLFeat implementation VLFeat, + /// Default implementation is PopSift Default = PopSift }; - enum LogMode { + /** + * @brief The logging mode. + */ + enum LogMode + { None, All }; - enum ScalingMode { + /** + * @brief The scaling mode. + */ + enum ScalingMode + { ScaleDirect, - ScaleDefault // Indirect - only working method + /// Indirect - only working method + ScaleDefault }; - /* Modes for descriptor extraction: */ - enum DescMode { - Loop, // scan horizontal, extract valid points - ILoop, // scan horizontal, extract valid points, interpolate with tex engine - Grid, // scan in rotated mode, round pixel address - IGrid, // scan in rotated mode, interpolate with tex engine - NoTile // variant of IGrid, no duplicate gradiant fetching + /** + * @brief Modes for descriptor extraction. + */ + enum DescMode + { + /// scan horizontal, extract valid points + Loop, + /// scan horizontal, extract valid points, interpolate with tex engine + ILoop, + /// scan in rotated mode, round pixel address + Grid, + /// scan in rotated mode, interpolate with tex engine + IGrid, + /// variant of IGrid, no duplicate gradient fetching + NoTile }; - enum NormMode { - RootSift, // The L1-inspired norm, gives better matching results - Classic // The L2-inspired norm, all descriptors on a hypersphere + /** + * @brief Type of norm to use for matching. + */ + enum NormMode + { + /// The L1-inspired norm, gives better matching results ("RootSift") + RootSift, + /// The L2-inspired norm, all descriptors on a hypersphere ("classic") + Classic }; - /* To reduce time used in descriptor extraction, some extrema can be filtered + /** + * @brief Filtering strategy. + * + * To reduce time used in descriptor extraction, some extrema can be filtered * immediately after finding them. It is possible to keep those with the largest * scale (LargestScaleFirst), smallest scale (SmallestScaleFirst), or a random * selection. Note that largest and smallest give a stable result, random does not. */ enum GridFilterMode { + /// keep a random selection RandomScale, + /// keep those with the largest scale LargestScaleFirst, + /// keep those with the smallest scale SmallestScaleFirst }; - /* A parameter for the PopSift constructor. Determines which data is kept in - * the Job data structure after processing, which is downloaded to the host, - * which is invalidated. + /** + * @brief Processing mode. + * + * Determines which data is kept in the Job data structure after processing, which one is downloaded to the host, + * which one is invalidated. */ enum ProcessingMode { ExtractingMode, MatchingMode }; + /** + * @brief Set the Gaussian mode from string. + * @param[in] m The string version of the GaussMode + * @see GaussMode + */ void setGaussMode( const std::string& m ); + /** + * @brief Set the Gaussian mode. + * @param[in] m The Gaussian mode to use. + */ void setGaussMode( GaussMode m ); + + /** + * @brief Set the Sift mode. + * @param[in] m The Sift mode + * @see SiftMode + */ void setMode( SiftMode m ); + + /** + * @brief Set the log mode. + * @param mode The log mode. + * @see LogMode + */ void setLogMode( LogMode mode = All ); void setScalingMode( ScalingMode mode = ScaleDefault ); + + /** + * @brief Enable/desable verbose mode. + * @param[in] on Whether to display additional information . + */ void setVerbose( bool on = true ); + + /** + * @brief Set the descriptor mode by string. + * @param[in] byname The string containing the descriptor mode. + * @see DescMode + */ void setDescMode( const std::string& byname ); + + /** + * @brief Set the descriptor mode. + * @param[in] mode The descriptor mode. + * @see DescMode + */ void setDescMode( DescMode mode = Loop ); - void setGaussGroup( int groupsize ); - int getGaussGroup( ) const; +// void setGaussGroup( int groupsize ); +// int getGaussGroup( ) const; void setDownsampling( float v ); void setOctaves( int v ); @@ -107,9 +192,9 @@ struct Config void setEdgeLimit( float v ); void setThreshold( float v ); void setInitialBlur( float blur ); - void setMaxExtreme( int m ); +// void setMaxExtreme( int m ); void setPrintGaussTables( ); - void setDPOrientation( bool on ); +// void setDPOrientation( bool on ); void setFilterMaxExtrema( int extrema ); void setFilterGridSize( int sz ); void setFilterSorting( const std::string& direction ); @@ -118,63 +203,80 @@ struct Config bool hasInitialBlur( ) const; float getInitialBlur( ) const; - // computes the actual peak threshold depending on the threshold - // parameter and the non-augmented number of levels + /// computes the actual peak threshold depending on the threshold + /// parameter and the non-augmented number of levels float getPeakThreshold() const; - // print Gauss spans and tables? + /// print Gauss spans and tables? bool ifPrintGaussTables() const; - // What Gauss filter scan is desired? + /// What Gauss filter scan is desired? GaussMode getGaussMode( ) const; - // Call this from the constructor. + /// Call this from the constructor. static GaussMode getGaussModeDefault( ); + // Helper functions for the main program's usage string. + /** + * @brief Get a message with the strings to use for setting the values of \p GaussMode + * @return A message with the list of strings + */ static const char* getGaussModeUsage( ); - // get the SIFT mode for more detailed sub-modes + /** + * @brief Get the SIFT mode for more detailed sub-modes + * @return The SiftMode + * @see SiftMode + */ SiftMode getSiftMode() const; - // find out if we should print logging info or not + /// find out if we should print logging info or not LogMode getLogMode() const; - // The number of octaves is chosen freely. If not specified, - // it is: log_2( min(x,y) ) - 3 - start_sampling + /// The number of octaves is chosen freely. If not specified, + /// it is: log_2( min(x,y) ) - 3 - start_sampling int octaves; - // The number of levels per octave. This is actually the - // number of inner DoG levels where we can search for - // feature points. The number of ... - // - // This is the non-augmented number of levels, meaning - // the this is not the number of gauss-filtered picture - // layers (which is levels+3), but the number of DoG - // layers in which we can search for extrema. + /// The number of levels per octave. This is actually the + /// number of inner DoG levels where we can search for + /// feature points. The number of ... + /// + /// This is the non-augmented number of levels, meaning + /// the this is not the number of gauss-filtered picture + /// layers (which is levels+3), but the number of DoG + /// layers in which we can search for extrema. int levels; float sigma; - // default edge_limit 16.0f from Celebrandil - // default edge_limit 10.0f from Bemap + /// default edge_limit 16.0f from Celebrandil + /// default edge_limit 10.0f from Bemap float _edge_limit; /** Functions related to descriptor normalization: L2-like or RootSift */ void setNormMode( NormMode m ); void setNormMode( const std::string& m ); + /** + * @brief Set the normalization mode. + * @param[in] on Use RootSift (\p true) or the L2-norm (\p false). + * @deprecated + * @see NormMode + */ DEPRECATED(void setUseRootSift( bool on )); bool getUseRootSift( ) const; NormMode getNormMode( NormMode m ) const; static NormMode getNormModeDefault( ); // Call this from the constructor. static const char* getNormModeUsage( ); // Helper functions for the main program's usage string. - /** Functions related to descriptor normalization: multiply with a power of 2 + /** + * @brief Functions related to descriptor normalization: multiply with a power of 2 */ int getNormalizationMultiplier( ) const; void setNormalizationMultiplier( int mul ); - /* The input image is stretched by 2^upscale_factor + /** + * @brief The input image is stretched by 2^upscale_factor * before processing. The factor 1 is default. */ inline float getUpscaleFactor( ) const { @@ -185,125 +287,124 @@ struct Config return _max_extrema; } - /* Have we enabled filtering? This is a compile time decision. + /** + * Have we enabled filtering? This is a compile time decision. * The reason is that we use Thrust, which increases compile * considerably and can be deactivated at the CMake level when * you work on something else. */ bool getCanFilterExtrema() const; - /* Set the approximate number of extrema whose orientation and descriptor + /** + * Set the approximate number of extrema whose orientation and descriptor * should be computed. Default is -1, which sets the hard limit defined * by "number of octaves * getMaxExtrema()". */ - int getFilterMaxExtrema( ) const { - return _filter_max_extrema; - } + int getFilterMaxExtrema() const { return _filter_max_extrema; } - /* To avoid that grid filtering happens only in a tiny piece of an image, + /** + * @brief Get the grid size for filtering. + * + * To avoid that grid filtering happens only in a tiny piece of an image, * the image is split into getFilterGridSize() X getFilterGridSize() tiles * and we allow getFilterMaxExtrema() / getFilterGridSize() extrema in * each tile. */ - int getFilterGridSize( ) const { - return _filter_grid_size; - } + int getFilterGridSize() const { return _filter_grid_size; } - /* See enum GridFilterMode */ - GridFilterMode getFilterSorting() const { - return _grid_filter_mode; - } + /** + * @brief Get the filtering mode. + * @return the filtering mode. + * @see GridFilterMode + */ + GridFilterMode getFilterSorting() const { return _grid_filter_mode; } - // check if we use direct downscaling from input image - // for all octaves - inline ScalingMode getScalingMode() const { - return _scaling_mode; - } + /** + * @brief Get the scaling mode. + * @return the descriptor extraction mode. + * @see ScalingMode + */ + inline ScalingMode getScalingMode() const { return _scaling_mode; } - inline DescMode getDescMode() const { - return _desc_mode; - } + /** + * @brief Get the descriptor extraction mode + * @return the descriptor extraction mode + * @see DescMode + */ + inline DescMode getDescMode() const { return _desc_mode; } bool equal( const Config& other ) const; private: - // default threshold 0.0 default of vlFeat - // default threshold 5.0 / 256.0 - // default threshold 15.0 / 256.0 - it seems our DoG is really small ??? - // default threshold 5.0 from Celebrandil, not happening in our data - // default threshold 0.04 / (_levels-3.0) / 2.0f * 255 - // from Bemap -> 1.69 (makes no sense) + /// default threshold 0.0 default of vlFeat + /// default threshold 5.0 / 256.0 + /// default threshold 15.0 / 256.0 - it seems our DoG is really small ??? + /// default threshold 5.0 from Celebrandil, not happening in our data + /// default threshold 0.04 / (_levels-3.0) / 2.0f * 255 + /// from Bemap -> 1.69 (makes no sense) float _threshold; - // determine the image format of the first octave - // relative to the input image's size (x,y) as follows: - // (x / 2^start_sampling, y / 2^start_sampling ) + /// determine the image format of the first octave + /// relative to the input image's size (x,y) as follows: + /// (x / 2^start_sampling, y / 2^start_sampling ) float _upscale_factor; - // default LogMode::None + /// default LogMode::None LogMode _log_mode; - // default: ScalingMode::DownscaledOctaves + /// default: ScalingMode::DownscaledOctaves ScalingMode _scaling_mode; - // default: DescMode::Loop + /// default: DescMode::Loop DescMode _desc_mode; - // default: RandomScale + /// default: RandomScale GridFilterMode _grid_filter_mode; public: bool verbose; private: - /* The number of initial extrema that can be discovered in an octave. - * This parameter changes memory requirements. - */ + /// The number of initial extrema that can be discovered in an octave. + /// This parameter changes memory requirements. int _max_extrema; - /* The maximum number of extrema that are returned. There may be - * several descriptors for each extremum. - */ + /// The maximum number of extrema that are returned. There may be + /// several descriptors for each extremum. int _filter_max_extrema; - // Used to achieve an approximation of _max_entrema - // Subdivide the image in this number of vertical and horizontal tiles, - // i.e. the grid is actually _grid_size X _grid_size tiles. - // default: 1 + /// Used to achieve an approximation of _max_entrema + /// Subdivide the image in this number of vertical and horizontal tiles, + /// i.e. the grid is actually _grid_size X _grid_size tiles. + /// default: 1 int _filter_grid_size; - /* Modes are computation according to VLFeat or OpenCV, - * or fixed size. Default is VLFeat mode. - */ + /// Modes are computation according to VLFeat or OpenCV, + /// or fixed size. Default is VLFeat mode. GaussMode _gauss_mode; - /* Modes are PopSift, OpenCV and VLFeat. - * Default is currently identical to PopSift. - */ + /// Modes are PopSift, OpenCV and VLFeat. + /// Default is currently identical to PopSift. SiftMode _sift_mode; - /* VLFeat code assumes that an initial input image is partially blurred. - * This changes the blur computation for the very first level of the first - * octave, turning it into a special case. - */ + /// VLFeat code assumes that an initial input image is partially blurred. + /// This changes the blur computation for the very first level of the first + /// octave, turning it into a special case. bool _assume_initial_blur; float _initial_blur; - /* OpenMVG requires a normalization named rootSift, the - * classical L2-inspired mode is also supported. - */ + /// OpenMVG requires a normalization named rootSift, the + /// classical L2-inspired mode is also supported. NormMode _normalization_mode; - /* SIFT descriptors are normalized in a final step. - * The values of the descriptor can also be multiplied - * by a power of 2 if required. - * Specify the exponent. - */ + /// SIFT descriptors are normalized in a final step. + /// The values of the descriptor can also be multiplied + /// by a power of 2 if required. + /// Specify the exponent. int _normalization_multiplier; - /* Call the debug functions in gauss_filter.cu to print Gauss - * filter width and Gauss tables in use. - */ + /// Call the debug functions in gauss_filter.cu to print Gauss + /// filter width and Gauss tables in use. bool _print_gauss_tables; }; diff --git a/src/popsift/sift_extremum.h b/src/popsift/sift_extremum.h index c3e41954..0363a02e 100755 --- a/src/popsift/sift_extremum.h +++ b/src/popsift/sift_extremum.h @@ -14,7 +14,8 @@ namespace popsift { -/* This is an internal data structure. +/** + * @brief This is an internal data structure. * Separated from the final Extremum data structure to implement * grid filtering in a space-efficient manner. In grid filtering, * extrema are first found, after that some may be discarded in @@ -25,14 +26,20 @@ struct InitialExtremum { float xpos; float ypos; - int lpos; // extremum refined into this level - float sigma; // scale; - int cell; // index into the grid for grid-based extrema filtering - bool ignore; // true if this extremum has been filtered - int write_index; // if any initial extrema are ignored, new index for Extremum + /// extremum refined into this level + int lpos; + /// scale + float sigma; + /// index into the grid for grid-based extrema filtering + int cell; + /// true if this extremum has been filtered + bool ignore; + /// if any initial extrema are ignored, new index for Extremum + int write_index; }; -/* This is an internal data structure. +/** + * @brief This is an internal data structure. * For performance reasons, it would be appropriate to split * the first 4 values from the rest of this structure. Right * now, descriptor computation is a bigger concern. @@ -41,16 +48,22 @@ struct Extremum { float xpos; float ypos; - int lpos; // extremum refined into this level - float sigma; // scale; + /// extremum refined into this level + int lpos; + /// scale + float sigma; - int octave; // belonging to this octave - int num_ori; // number of this extremum's orientations - int idx_ori; // exclusive prefix sum of the layer's orientations + /// belonging to this octave + int octave; + /// number of this extremum's orientations + int num_ori; + /// exclusive prefix sum of the layer's orientations + int idx_ori; float orientation[ORIENTATION_MAX_COUNT]; }; -/* This is a data structure that is returned to a calling program. +/** + * @brief This is a data structure that is returned to a calling program. * This is the SIFT descriptor itself. */ struct Descriptor diff --git a/src/popsift/sift_octave.h b/src/popsift/sift_octave.h index 927b1cb1..fc2ad13b 100755 --- a/src/popsift/sift_octave.h +++ b/src/popsift/sift_octave.h @@ -131,10 +131,14 @@ class Octave return _dog_3d_tex_linear; } - /** - * alloc() - allocates all GPU memories for one octave - * @param width in floats, not bytes!!! - */ + /** + * @brief Allocates all GPU memories for one octave. + * @param conf + * @param width in floats + * @param height + * @param levels + * @param gauss_group + */ void alloc( const Config& conf, int width, int height, From e24d12a8401c617e6f22f791ed1b48961d4c38e6 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 29 Jun 2020 13:42:48 +0200 Subject: [PATCH 229/285] [doc] add .readthedocs.yml --- .readthedocs.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .readthedocs.yml diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..79484e63 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,19 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: doc/sphinx/source/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 + install: + - requirements: doc/sphinx/requirements.txt From 2ce996021395e7f9bbc2a3607db9e6200fb77040 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 2 Jul 2020 09:59:04 +0200 Subject: [PATCH 230/285] [cmake] by default do not build the doc --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b1ed33a..6e62ae7b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON) -option(PopSift_BUILD_DOCS "Build PopSift documentation." ON) +option(PopSift_BUILD_DOCS "Build PopSift documentation." OFF) option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) From 387face77392f020abd8de73bdbd81a176cbed60 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 2 Jul 2020 10:01:03 +0200 Subject: [PATCH 231/285] [ci] do not build the doc --- .travis.yml | 8 ++++---- appveyor.yml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3e9d2db1..9a5dd795 100644 --- a/.travis.yml +++ b/.travis.yml @@ -79,14 +79,14 @@ before_script: - mkdir -p ${POPSIFT_BUILD_RELEASE} - cd ${POPSIFT_BUILD_RELEASE} - > - cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL_RELEASE} -DCMAKE_BUILD_TYPE=Release + cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL_RELEASE} -DCMAKE_BUILD_TYPE=Release -DPopSift_BUILD_DOCS:BOOL=OFF # Classic debug build # Create build folder - mkdir -p ${POPSIFT_BUILD_DEBUG} - cd ${POPSIFT_BUILD_DEBUG} - > - cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL_DEBUG} -DCMAKE_BUILD_TYPE=Debug + cmake . ${POPSIFT_SOURCE} -DCMAKE_INSTALL_PREFIX=${POPSIFT_INSTALL_DEBUG} -DCMAKE_BUILD_TYPE=Debug -DPopSift_BUILD_DOCS:BOOL=OFF script: - cd ${POPSIFT_BUILD_RELEASE} @@ -98,7 +98,7 @@ script: - cd ${POPSIFT_APP_SRC} - mkdir -p ${POPSIFT_APP_BUILD_RELEASE} - cd ${POPSIFT_APP_BUILD_RELEASE} - - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL_RELEASE}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL_RELEASE} -DCMAKE_BUILD_TYPE=Release + - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL_RELEASE}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL_RELEASE} -DCMAKE_BUILD_TYPE=Release -DPopSift_BUILD_DOCS:BOOL=OFF - make install -j 2 VERBOSE=1 # same for debug @@ -111,7 +111,7 @@ script: - cd ${POPSIFT_APP_SRC} - mkdir -p ${POPSIFT_APP_BUILD_DEBUG} - cd ${POPSIFT_APP_BUILD_DEBUG} - - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL_DEBUG}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL_DEBUG} -DCMAKE_BUILD_TYPE=Debug + - cmake .. -DPopSift_DIR=${POPSIFT_INSTALL_DEBUG}/lib/cmake/PopSift/ -DCMAKE_INSTALL_PREFIX=${POPSIFT_APP_INSTALL_DEBUG} -DCMAKE_BUILD_TYPE=Debug -DPopSift_BUILD_DOCS:BOOL=OFF - make install -j 2 VERBOSE=1 cache: diff --git a/appveyor.yml b/appveyor.yml index 3cdf7298..dc822f2b 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,7 +26,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: From 568a4c93426d4549c4896f3b46914d768356cab9 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Jul 2020 11:21:26 +0200 Subject: [PATCH 232/285] [cmake] add support CC for cuda 11 fix #102 --- cmake/ChooseCudaCC.cmake | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index fd75478e..abd0086f 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -65,7 +65,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(CC_LIST_BY_SYSTEM_PROCESSOR "") if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75") + list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80") endif() if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") @@ -79,9 +79,12 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) # Shortening the lists saves a lot of compile time. # set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 75) - if(CUDA_VERSION_MAJOR GREATER_EQUAL 10) + set(CUDA_MAX_CC 80) + if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) + set(CUDA_MIN_CC 35) + elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 10) set(CUDA_MIN_CC 30) + set(CUDA_MAX_CC 75) elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9) set(CUDA_MIN_CC 30) set(CUDA_MAX_CC 72) From 5cebd391830f5d4f440f406cac152737a1f36adb Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Jul 2020 11:21:42 +0200 Subject: [PATCH 233/285] [ci] add cuda 11 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 3e9d2db1..8d66fbf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ env: - CUDA_VERSION_MAJOR="8" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.61-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - CUDA_VERSION_MAJOR="9" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.148-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - CUDA_VERSION_MAJOR="10" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.89-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="11" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.2-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" global: From 2d9ad34d7127faf9df95f78250df3fd93416206a Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Jul 2020 11:56:18 +0200 Subject: [PATCH 234/285] [ci] test cuda 11 --- .travis.yml | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d66fbf8..4e71f353 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,21 +58,33 @@ before_install: install: - UBUNTU_VERSION=ubuntu1604 - - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb - - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG - - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub - - sudo dpkg -i $CUDA_REPO_PKG - - rm ${CUDA_REPO_PKG} - - travis_retry sudo apt-get -y update - # cuda > 10.0 changed cublas naming - > - if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then - CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION + if [ ${CUDA_VERSION_MAJOR} -lt 11 ]; then + CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb + wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG + travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub + sudo dpkg -i $CUDA_REPO_PKG + rm ${CUDA_REPO_PKG} + travis_retry sudo apt-get -y update + # cuda > 10.0 changed cublas naming + if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then + CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION + else + CUBLAS_PKG=libcublas-dev + fi + travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION + sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda else - CUBLAS_PKG=libcublas-dev + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin + travis_retry sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600 + travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub + travis_retry sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /" + sudo apt-get update && sudo apt-get -y install cuda fi - - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION - - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda +# - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb +# - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG + + before_script: # Classic release build From d5c63cb700e69366711b4f6d73843cf135492436 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 30 Jul 2020 12:04:58 +0200 Subject: [PATCH 235/285] [cmake] CUDA SDK 11 requires C++14 --- CMakeLists.txt | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f66847e..396308b0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,16 +44,6 @@ if(MSVC AND NOT BUILD_SHARED_LIBS) endforeach() endif() -# for some reason this line is necessary to propagate the standard to nvcc -# On MSVC this is not necessary / nvcc does not recognize the flag for MSVC -if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -endif() -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_STANDARD 11) -set(CMAKE_CUDA_STANDARD_REQUIRED ON) - # ============================================================================== # GNUInstallDirs CMake module # - Define GNU standard installation directories @@ -164,6 +154,28 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") endif() endif() +if(CUDA_VERSION_MAJOR LESS 11) + # for some reason this line is necessary to propagate the standard to nvcc + # On MSVC this is not necessary / nvcc does not recognize the flag for MSVC + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CUDA_STANDARD 11) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +else() + # CUDA SDK 11 has a version of CUB that requires C++14 + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + endif() + set(CMAKE_CXX_STANDARD 14) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() + + if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5") set(PopSift_HAVE_NORMF 1) else() From f763b0fd259bc265d92d7527291f24fafaf76c2a Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 30 Jul 2020 13:36:05 +0200 Subject: [PATCH 236/285] [cmake] Add explicit nvcc flags -std=c++14 for SDK 11+ --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 396308b0..fec71a95 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,6 +168,7 @@ else() # CUDA SDK 11 has a version of CUB that requires C++14 if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + list(APPEND CUDA_NVCC_FLAGS "-std=c++14") endif() set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) From 35242023346dcbdb0d34391d174528a1cc6e2fe9 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 31 Jul 2020 08:00:35 +0200 Subject: [PATCH 237/285] [cmake] shorter C++ Std 11/14 distinction --- CMakeLists.txt | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fec71a95..7194d28f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,27 +154,19 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") endif() endif() +set(PopSift_CXX_STANDARD 14) # Thrust/CUB requires C++14 starting with CUDA SDK 11 if(CUDA_VERSION_MAJOR LESS 11) - # for some reason this line is necessary to propagate the standard to nvcc - # On MSVC this is not necessary / nvcc does not recognize the flag for MSVC - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - endif() - set(CMAKE_CXX_STANDARD 11) - set(CMAKE_CXX_STANDARD_REQUIRED ON) - set(CMAKE_CUDA_STANDARD 11) - set(CMAKE_CUDA_STANDARD_REQUIRED ON) -else() - # CUDA SDK 11 has a version of CUB that requires C++14 - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") - list(APPEND CUDA_NVCC_FLAGS "-std=c++14") - endif() - set(CMAKE_CXX_STANDARD 14) - set(CMAKE_CXX_STANDARD_REQUIRED ON) - set(CMAKE_CUDA_STANDARD 14) - set(CMAKE_CUDA_STANDARD_REQUIRED ON) + set(PopSift_CXX_STANDARD 11) +endif() + +if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${PopSift_CXX_STANDARD}") + list(APPEND CUDA_NVCC_FLAGS "-std=c++${PopSift_CXX_STANDARD}") endif() +set(CMAKE_CXX_STANDARD ${PopSift_CXX_STANDARD}) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_STANDARD ${PopSift_CXX_STANDARD}) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5") From 29ea65292b360d346b9c601b6cbcb0b97ee04134 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 31 Jul 2020 08:09:54 +0200 Subject: [PATCH 238/285] [cuda] C++14 is now required --- CMakeLists.txt | 6 +++--- README.md | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7194d28f..4a4d7d96 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,9 +155,9 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") endif() set(PopSift_CXX_STANDARD 14) # Thrust/CUB requires C++14 starting with CUDA SDK 11 -if(CUDA_VERSION_MAJOR LESS 11) - set(PopSift_CXX_STANDARD 11) -endif() +# if(CUDA_VERSION_MAJOR LESS 11) +# set(PopSift_CXX_STANDARD 11) +# endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${PopSift_CXX_STANDARD}") diff --git a/README.md b/README.md index 5a6afd74..cd418814 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ PopSift compiles and works with NVidia cards of compute capability >= 3.0 (inclu PopSift depends on: +* C++14 + * CUDA >= 7.0 Optionally, for the provided applications: From 29363b2a0f5549a380b376c86c4865e085206446 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 31 Jul 2020 08:23:11 +0200 Subject: [PATCH 239/285] [cuda] SDK 8 does not support C++14 --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a4d7d96..cea6434d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,9 +155,9 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") endif() set(PopSift_CXX_STANDARD 14) # Thrust/CUB requires C++14 starting with CUDA SDK 11 -# if(CUDA_VERSION_MAJOR LESS 11) -# set(PopSift_CXX_STANDARD 11) -# endif() +if(CUDA_VERSION_MAJOR LESS_EQUAL 8) + set(PopSift_CXX_STANDARD 11) +endif() if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${PopSift_CXX_STANDARD}") From 998db9d868c55073aee1eb895761febd75f24dcc Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 30 Jul 2020 12:11:24 +0200 Subject: [PATCH 240/285] [cmake] testfile location in CMake cache --- CMakeLists.txt | 14 +++++++++++--- testScripts/CMakeLists.txt | 2 +- testScripts/TEST.sh.in | 10 ++++------ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cea6434d..6f4b21a9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,11 +13,10 @@ option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after e option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF) -option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) +option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" ON) - if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() @@ -201,7 +200,13 @@ endif() add_subdirectory(src) +set(PopSift_TESTFILE_PATH "../../GIT/popsift-samples/datasets/sample/big_set/" CACHE STRING "Base directory where your test files are stored") if(PopSift_USE_TEST_CMD) + if(NOT IS_ABSOLUTE("${PopSift_TESTFILE_PATH}")) + get_filename_component(PopSift_TESTFILES "${PopSift_TESTFILE_PATH}" ABSOLUTE) + set(PopSift_TESTFILE_PATH "${PopSift_TESTFILES}") + endif() + add_subdirectory(testScripts) endif() @@ -228,9 +233,12 @@ message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDE message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) -message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD}) message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) +message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD}) +if(PopSift_USE_TEST_CMD) + message(STATUS "Path for test input: " ${PopSift_TESTFILE_PATH}) +endif() message("\n******************************************") message("\n") diff --git a/testScripts/CMakeLists.txt b/testScripts/CMakeLists.txt index 77fb7ede..411a8e30 100755 --- a/testScripts/CMakeLists.txt +++ b/testScripts/CMakeLists.txt @@ -5,7 +5,7 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/testOxfordDataset.sh.in ${CMAKE_CURRENT_BINARY_DIR}/testOxfordDataset.sh ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/TEST.sh.in - ${CMAKE_CURRENT_BINARY_DIR}/TEST.sh ) + ${CMAKE_CURRENT_BINARY_DIR}/TEST.sh ) add_custom_target( prepare-test diff --git a/testScripts/TEST.sh.in b/testScripts/TEST.sh.in index 89dd094e..28bc2433 100755 --- a/testScripts/TEST.sh.in +++ b/testScripts/TEST.sh.in @@ -1,10 +1,8 @@ #!/bin/bash -# IMAGE=../../popsift-samples/sample/big_set/boat/img2.ppm -IMAGE=../../popsift-samples/sample/big_set/boat/img3.ppm -# IMAGE=./test-17x17.pgm +IMAGE=@PopSift_TESTFILE_PATH@/boat/img3.ppm -POPSIFT_DEMO_BIN=@EXECUTABLE_OUTPUT_PATH@/popsift-demo +POPSIFT_DEMO_BIN=@CMAKE_BINARY_DIR@/@CMAKE_SYSTEM_NAME@-@CMAKE_SYSTEM_PROCESSOR@/popsift-demo LOG=--log # LOG= @@ -18,8 +16,8 @@ FILTER="--filter-max-extrema=2000 --filter-grid=2 --filter-sort=down" PARAMS="$LOG $GAUSS_MODE $SCALING $FILTER --popsift-mode --octaves=8 --threshold=0.04 --edge-threshold=10.0 --initial-blur=0.5" -for mode in loop ; do -# for mode in loop grid igrid notile ; do +# for mode in loop ; do +for mode in loop grid igrid notile ; do # for mode in igrid notile ; do echo "MODE: $mode" echo "$POPSIFT_DEMO_BIN $PARAMS --desc-mode=$mode --write-as-uchar --norm-multi=9 -i $IMAGE" From e7e3141e2039ce2a66fabfae2a5cb279c0b10f7c Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 30 Jul 2020 14:36:23 +0200 Subject: [PATCH 241/285] [doc] clarify that SKD 11 does not support CC 3.0 any more --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index cd418814..79566e33 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ PopSift tries to stick as closely as possible to David Lowe's famous paper [1], PopSift compiles and works with NVidia cards of compute capability >= 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. +CUDA SDK 11 does no longer support compute capability 3.0. 3.5 is still supported with deprecation warning. + ## Dependencies PopSift depends on: From b84581dd0d7efd409e6a6d1a45ea809ff3467f42 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 30 Jul 2020 14:42:07 +0200 Subject: [PATCH 242/285] [test] slightly better feature comparison file --- testScripts/testOxfordDataset.sh.in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/testScripts/testOxfordDataset.sh.in b/testScripts/testOxfordDataset.sh.in index 1beab726..e04fd3d5 100644 --- a/testScripts/testOxfordDataset.sh.in +++ b/testScripts/testOxfordDataset.sh.in @@ -30,12 +30,12 @@ do cd $dataset for img in img1 img2 img3 img4 img5 img6 do - if [ -f @CMAKE_SOURCE_DIR@/oxford/$dataset/$img.pgm ] + if [ -f @PopSift_TESTFILE_PATH@/$dataset/$img.pgm ] then - imgfile=@CMAKE_SOURCE_DIR@/oxford/$dataset/$img.pgm - elif [ -f @CMAKE_SOURCE_DIR@/oxford/$dataset/$img.ppm ] + imgfile=@PopSift_TESTFILE_PATH@/$dataset/$img.pgm + elif [ -f @PopSift_TESTFILE_PATH@/$dataset/$img.ppm ] then - imgfile=@CMAKE_SOURCE_DIR@/oxford/$dataset/$img.ppm + imgfile=@PopSift_TESTFILE_PATH@/$dataset/$img.ppm else continue fi @@ -45,7 +45,7 @@ do echo "Directory output-$img exists. Skipping." continue fi - @EXECUTABLE_OUTPUT_PATH@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile + @CMAKE_BINARY_DIR@/@CMAKE_SYSTEM_NAME@-@CMAKE_SYSTEM_PROCESSOR@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile if [ $? != 0 ] then echo "Running popsift on $imgfile failed." From 4de4db68175d4754618df21ba6acf3693165e501 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 4 Aug 2020 09:44:37 +0200 Subject: [PATCH 243/285] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cd418814..881ea544 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ PopSift compiles and works with NVidia cards of compute capability >= 3.0 (inclu PopSift depends on: -* C++14 +* Host compiler that supports C++14 for CUDA SDK >= 9.0 and C++11 for CUDA SDK 8 -* CUDA >= 7.0 +* CUDA >= 8.0 Optionally, for the provided applications: From 4f50f4cca07f5d35a9c22aa23fa7e08b276ada60 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Mon, 10 Aug 2020 09:38:06 +0200 Subject: [PATCH 244/285] [doc] updated changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 0f53088b..aaaad630 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Improved checks for CUDA textures [PR](https://github.com/alicevision/popsift/pull/89) - CMake: Improved support for all Cuda CC [PR](https://github.com/alicevision/popsift/pull/75) +- CMake: support for cuda 11 [PR](https://github.com/alicevision/popsift/pull/103) - Support for Cuda CC 7 cards (RTX 2080) [PR](https://github.com/alicevision/popsift/pull/67) - Support for Boost 1.70 [PR](https://github.com/alicevision/popsift/pull/65) From 8f818c34bfaa42e6612a063f30c156ef354410cf Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Thu, 10 Sep 2020 07:14:35 +0200 Subject: [PATCH 245/285] [bug] pair malloc with free, not delete --- src/popsift/popsift.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index ec17149a..68e83808 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -380,7 +380,7 @@ SiftJob::SiftJob( int w, int h, const float* imageData ) SiftJob::~SiftJob( ) { - delete [] _imageData; + free( _imageData ); } void SiftJob::setImg( popsift::ImageBase* img ) From 800b7b16b70db32b34205f6081870bfdb0151806 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 11 Sep 2020 09:12:56 +0200 Subject: [PATCH 246/285] [bugfix] fumbling in the dark --- src/popsift/s_orientation.cu | 25 +++++++++++-------------- src/popsift/sift_pyramid.cu | 2 +- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index 95ef911d..db1c60a9 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -66,13 +66,15 @@ void ori_par( const int octave, { const int extremum_index = blockIdx.x * blockDim.y; - if( extremum_index >= dct.ext_ct[octave] ) return; // a few trailing warps + if( popsift::all( extremum_index >= dct.ext_ct[octave] ) ) return; // a few trailing warps const int iext_off = dobuf.i_ext_off[octave][extremum_index]; const InitialExtremum* iext = &dobuf.i_ext_dat[octave][iext_off]; __shared__ float hist [ORI_NBINS]; __shared__ float sm_hist[ORI_NBINS]; + __shared__ float refined_angle[64]; + __shared__ float yval [64]; for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f; __syncthreads(); @@ -84,11 +86,11 @@ void ori_par( const int octave, const float sig = iext->sigma; /* orientation histogram radius */ - float sigw = ORI_WINFACTOR * sig; - int32_t rad = (int)roundf((3.0f * sigw)); + const float sigw = ORI_WINFACTOR * sig; + const int32_t rad = (int)roundf((3.0f * sigw)); - float factor = __fdividef( -0.5f, (sigw * sigw) ); - int sq_thres = rad * rad; + const float factor = __fdividef( -0.5f, (sigw * sigw) ); + const int sq_thres = rad * rad; // int xmin = max(1, (int)floor(x - rad)); // int xmax = min(w - 2, (int)floor(x + rad)); @@ -131,14 +133,17 @@ void ori_par( const int octave, if( bidx > ORI_NBINS ) { printf("Crashing: bin %d theta %f :-)\n", bidx, theta); } + if( bidx < 0 ) { + printf("Crashing: bin %d theta %f :-)\n", bidx, theta); + } bidx = (bidx == ORI_NBINS) ? 0 : bidx; atomicAdd( &hist[bidx], weight ); } } - __syncthreads(); } + __syncthreads(); #ifdef WITH_VLFEAT_SMOOTHING for( int i=0; i<3; i++ ) { @@ -178,8 +183,6 @@ void ori_par( const int octave, // sub-cell refinement of the histogram cell index, yielding the angle // not necessary to initialize, every cell is computed - __shared__ float refined_angle[64]; - __shared__ float yval [64]; for( int bin = threadIdx.x; popsift::any( bin < ORI_NBINS ); bin += blockDim.x ) { const int prev = bin == 0 ? ORI_NBINS-1 : bin-1; @@ -349,11 +352,8 @@ void ori_prefix_sum( const int total_ext_ct, const int num_octaves ) __host__ void Pyramid::orientation( const Config& conf ) { - nvtxRangePushA( "reading extrema count" ); readDescCountersFromDevice( ); - nvtxRangePop( ); - nvtxRangePushA( "filtering grid" ); int ext_total = 0; for(int o : hct.ext_ct) { @@ -369,11 +369,8 @@ void Pyramid::orientation( const Config& conf ) { ext_total = extrema_filter_grid( conf, ext_total ); } - nvtxRangePop( ); - nvtxRangePushA( "reallocating extrema arrays" ); reallocExtrema( ext_total ); - nvtxRangePop( ); int ext_ct_prefix_sum = 0; for( int octave=0; octave<_num_octaves; octave++ ) { diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index a983bc84..f5cb3e2f 100755 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -290,7 +290,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf ) nvtxRangePushA( "download descriptors" ); FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total ); - if( hct.ext_total == 0 ) + if( hct.ext_total == 0 || hct.ori_total == 0 ) { nvtxRangePop(); return features; From b28eaf4398da9c086d56c0e60d2b37cf3ff79b95 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 11 Sep 2020 10:23:42 +0200 Subject: [PATCH 247/285] [bugfix] use helper function for histogram smoothing --- src/popsift/s_orientation.cu | 50 ++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index db1c60a9..81eed100 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -52,6 +52,21 @@ inline float compute_angle( int bin, float hc, float hn, float hp ) return th; } +/* + * Histogram smoothing helper + */ +template +__device__ +inline static float smoothe( const float* const src, const int bin ) +{ + const int prev = (bin == 0) ? ORI_NBINS-1 : bin-1; + const int next = (bin == ORI_NBINS-1) ? 0 : bin+1; + + const float f = ( src[prev] + src[bin] + src[next] ) / 3.0f; + + return f; +} + /* * Compute the keypoint orientations for each extremum * using 16 threads for each of them. @@ -71,13 +86,13 @@ void ori_par( const int octave, const int iext_off = dobuf.i_ext_off[octave][extremum_index]; const InitialExtremum* iext = &dobuf.i_ext_dat[octave][iext_off]; - __shared__ float hist [ORI_NBINS]; - __shared__ float sm_hist[ORI_NBINS]; + __shared__ float hist [64]; + __shared__ float sm_hist [64]; __shared__ float refined_angle[64]; __shared__ float yval [64]; - for( int i = threadIdx.x; i < ORI_NBINS; i += blockDim.x ) hist[i] = 0.0f; - __syncthreads(); + hist[threadIdx.x+ 0] = 0.0f; + hist[threadIdx.x+32] = 0.0f; /* keypoint fractional geometry */ const float x = iext->xpos; @@ -105,6 +120,7 @@ void ori_par( const int octave, int hy = ymax - ymin + 1; int loops = wx * hy; + __syncthreads(); for( int i = threadIdx.x; popsift::any(i < loops); i += blockDim.x ) { if( i < loops ) { @@ -124,7 +140,8 @@ void ori_par( const int octave, float dy = yy - y; int sq_dist = dx * dx + dy * dy; - if (sq_dist <= sq_thres) { + if (sq_dist <= sq_thres) + { float weight = grad * expf(sq_dist * factor); // int bidx = (int)rintf( __fdividef( ORI_NBINS * (theta + M_PI), M_PI2 ) ); @@ -146,23 +163,18 @@ void ori_par( const int octave, __syncthreads(); #ifdef WITH_VLFEAT_SMOOTHING - for( int i=0; i<3; i++ ) { - for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) { - int prev = bin == 0 ? ORI_NBINS-1 : bin-1; - int next = bin == ORI_NBINS-1 ? 0 : bin+1; - sm_hist[bin] = ( hist[prev] + hist[bin] + hist[next] ) / 3.0f; - } + for( int i=0; i<3 ; i++ ) + { + sm_hist[threadIdx.x+ 0] = smoothe<0>( hist, threadIdx.x+ 0 ); + sm_hist[threadIdx.x+32] = smoothe<1>( hist, threadIdx.x+32 ); __syncthreads(); - for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) { - int prev = bin == 0 ? ORI_NBINS-1 : bin-1; - int next = bin == ORI_NBINS-1 ? 0 : bin+1; - hist[bin] = ( sm_hist[prev] + sm_hist[bin] + sm_hist[next] ) / 3.0f; - } + hist[threadIdx.x+ 0] = smoothe<2>( sm_hist, threadIdx.x+ 0 ); + hist[threadIdx.x+32] = smoothe<3>( sm_hist, threadIdx.x+32 ); __syncthreads(); } - for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) { - sm_hist[bin] = hist[bin]; - } + + sm_hist[threadIdx.x+ 0] = hist[threadIdx.x+ 0]; + sm_hist[threadIdx.x+32] = hist[threadIdx.x+32]; __syncthreads(); #else // not WITH_VLFEAT_SMOOTHING for( int bin = threadIdx.x; bin < ORI_NBINS; bin += blockDim.x ) { From 913b358e10a48710b55e17c955e56a6156be5a7f Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 11 Sep 2020 10:24:12 +0200 Subject: [PATCH 248/285] [bugfix] origin of the bug: using CUDA shared mem but claiming 0 bytes --- src/popsift/s_orientation.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index 81eed100..f6b36fcd 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -411,7 +411,7 @@ void Pyramid::orientation( const Config& conf ) grid.x = num; ori_par - <<>> + <<>> ( octave, hct.ext_ps[octave], oct_obj.getDataTexPoint( ), From 614b942e30de7627d74f429d11a5ca38d54ee8af Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 2 Oct 2020 14:13:13 +0200 Subject: [PATCH 249/285] Update testScripts/testOxfordDataset.sh.in Co-authored-by: Simone Gasparini --- testScripts/testOxfordDataset.sh.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/testScripts/testOxfordDataset.sh.in b/testScripts/testOxfordDataset.sh.in index e04fd3d5..eec048db 100644 --- a/testScripts/testOxfordDataset.sh.in +++ b/testScripts/testOxfordDataset.sh.in @@ -45,7 +45,7 @@ do echo "Directory output-$img exists. Skipping." continue fi - @CMAKE_BINARY_DIR@/@CMAKE_SYSTEM_NAME@-@CMAKE_SYSTEM_PROCESSOR@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile + @CMAKE_RUNTIME_OUTPUT_DIRECTORY@/popsift-demo --log --gauss-mode vlfeat --desc-mode loop --popsift-mode --root-sift --downsampling -1 -i $imgfile if [ $? != 0 ] then echo "Running popsift on $imgfile failed." @@ -157,4 +157,3 @@ do fi done done - From f04e17a785497dac45d741a00e9a2afe4269cbf0 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 2 Oct 2020 14:13:32 +0200 Subject: [PATCH 250/285] Update testScripts/TEST.sh.in Co-authored-by: Simone Gasparini --- testScripts/TEST.sh.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/testScripts/TEST.sh.in b/testScripts/TEST.sh.in index 28bc2433..943f45eb 100755 --- a/testScripts/TEST.sh.in +++ b/testScripts/TEST.sh.in @@ -2,7 +2,7 @@ IMAGE=@PopSift_TESTFILE_PATH@/boat/img3.ppm -POPSIFT_DEMO_BIN=@CMAKE_BINARY_DIR@/@CMAKE_SYSTEM_NAME@-@CMAKE_SYSTEM_PROCESSOR@/popsift-demo +POPSIFT_DEMO_BIN=@CMAKE_RUNTIME_OUTPUT_DIRECTORY@/popsift-demo LOG=--log # LOG= @@ -71,4 +71,3 @@ echo -n "grid vs notile: " echo -n "igrid vs notile: " ~/GIT/github/popsift-samples/playground/build/compare-descfiles \ -q output-features-igrid.txt output-features-notile.txt - From 309e1a3c1d82336a6a56faf8c05624e3bd2ae8f0 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 5 Oct 2020 12:39:26 +0200 Subject: [PATCH 251/285] [testing] remove too long path prefix Co-authored-by: Fabien Castan --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f4b21a9..13d49614 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -200,7 +200,7 @@ endif() add_subdirectory(src) -set(PopSift_TESTFILE_PATH "../../GIT/popsift-samples/datasets/sample/big_set/" CACHE STRING "Base directory where your test files are stored") +set(PopSift_TESTFILE_PATH "popsift-samples/datasets/sample/big_set/" CACHE STRING "Base directory where your test files are stored") if(PopSift_USE_TEST_CMD) if(NOT IS_ABSOLUTE("${PopSift_TESTFILE_PATH}")) get_filename_component(PopSift_TESTFILES "${PopSift_TESTFILE_PATH}" ABSOLUTE) From 13cf9427df109779219f9ede928d698f0a11f4f2 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 6 Oct 2020 17:03:56 +0200 Subject: [PATCH 252/285] [cmake] added support for SM86 as per here http://arnon.dk/tag/nvcc-flags/ --- cmake/ChooseCudaCC.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index abd0086f..d9bc6c22 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -65,7 +65,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(CC_LIST_BY_SYSTEM_PROCESSOR "") if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80") + list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86") endif() if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") @@ -79,7 +79,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) # Shortening the lists saves a lot of compile time. # set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 80) + set(CUDA_MAX_CC 86) if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) set(CUDA_MIN_CC 35) elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 10) From d6cb7708b8ebe3b6cef935e9329d4dc91c20e193 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Wed, 28 Oct 2020 18:03:52 +0100 Subject: [PATCH 253/285] [cmake] fix sm 86 is only for cuda >= 11.1 --- cmake/ChooseCudaCC.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index d9bc6c22..425e8bd5 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -80,8 +80,11 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) # set(CUDA_MIN_CC 20) set(CUDA_MAX_CC 86) - if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) + if(CUDA_VERSION VERSION_GREATER_EQUAL 11.1) set(CUDA_MIN_CC 35) + elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 11) + set(CUDA_MIN_CC 35) + set(CUDA_MAX_CC 80) elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 10) set(CUDA_MIN_CC 30) set(CUDA_MAX_CC 75) From 290e14277062631753a28158afcef659439e8e4a Mon Sep 17 00:00:00 2001 From: mitjap Date: Fri, 20 Nov 2020 13:46:33 +0100 Subject: [PATCH 254/285] add support for device selection and multiple GPUs --- src/popsift/gauss_filter.cu | 2 +- src/popsift/gauss_filter.h | 2 +- src/popsift/popsift.cpp | 49 ++++++++++++++++++++++++++++------- src/popsift/popsift.h | 8 ++++-- src/popsift/sift_constants.cu | 2 +- src/popsift/sift_constants.h | 2 +- src/popsift/sift_pyramid.cu | 18 ++++++------- src/popsift/sift_pyramid.h | 14 +++++----- 8 files changed, 64 insertions(+), 33 deletions(-) mode change 100755 => 100644 src/popsift/sift_pyramid.cu diff --git a/src/popsift/gauss_filter.cu b/src/popsift/gauss_filter.cu index af02bbbf..537c843e 100755 --- a/src/popsift/gauss_filter.cu +++ b/src/popsift/gauss_filter.cu @@ -18,7 +18,7 @@ namespace popsift { __device__ __constant__ GaussInfo d_gauss; -__align__(128) GaussInfo h_gauss; +__align__(128) thread_local GaussInfo h_gauss; __global__ diff --git a/src/popsift/gauss_filter.h b/src/popsift/gauss_filter.h index 174d2c3e..db1a8c25 100755 --- a/src/popsift/gauss_filter.h +++ b/src/popsift/gauss_filter.h @@ -105,7 +105,7 @@ struct GaussInfo }; extern __device__ __constant__ GaussInfo d_gauss; -extern GaussInfo h_gauss; +extern thread_local GaussInfo h_gauss; /* init_filter must be called early to initialize the Gauss tables. */ diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 68e83808..c3d912ca 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -19,9 +19,13 @@ using namespace std; -PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode, ImageMode imode ) +PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode mode, ImageMode imode, int device ) : _image_mode( imode ) + , _device(device) { + cudaSetDevice(_device); + configure(config); + if( imode == ByteImages ) { _pipe._unused.push( new popsift::Image); @@ -33,8 +37,6 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode _pipe._unused.push( new popsift::ImageFloat ); } - configure( config, true ); - _pipe._thread_stage1.reset( new std::thread( &PopSift::uploadImages, this )); if( mode == popsift::Config::ExtractingMode ) _pipe._thread_stage2.reset( new std::thread( &PopSift::extractDownloadLoop, this )); @@ -42,9 +44,12 @@ PopSift::PopSift( const popsift::Config& config, popsift::Config::ProcessingMode _pipe._thread_stage2.reset( new std::thread( &PopSift::matchPrepareLoop, this )); } -PopSift::PopSift( ImageMode imode ) +PopSift::PopSift( ImageMode imode, int device ) : _image_mode( imode ) + , _device(device) { + cudaSetDevice(_device); + if( imode == ByteImages ) { _pipe._unused.push( new popsift::Image); @@ -68,16 +73,20 @@ PopSift::~PopSift() } } -bool PopSift::configure( const popsift::Config& config, bool force ) +bool PopSift::configure( const popsift::Config& config, bool /*force*/ ) { if( _pipe._pyramid != nullptr ) { return false; } _config = config; - _config.levels = max( 2, config.levels ); + return true; +} + +bool PopSift::applyConfiguration(bool force) +{ if( force || ( _config != _shadow_config ) ) { popsift::init_filter( _config, @@ -131,6 +140,16 @@ bool PopSift::private_init( int w, int h ) return true; } +bool PopSift::private_unit() +{ + Pipe& p = _pipe; + + delete p._pyramid; + p._pyramid = nullptr; + + return true; +} + void PopSift::uninit( ) { if(!_isInit) @@ -273,6 +292,8 @@ SiftJob* PopSift::enqueue( int w, void PopSift::uploadImages( ) { + cudaSetDevice(_device); + SiftJob* job; while( ( job = _pipe._queue_stage1.pull() ) != nullptr ) { popsift::ImageBase* img = _pipe._unused.pull(); @@ -284,10 +305,15 @@ void PopSift::uploadImages( ) void PopSift::extractDownloadLoop( ) { + cudaSetDevice(_device); + applyConfiguration(true); + Pipe& p = _pipe; SiftJob* job; while( ( job = p._queue_stage2.pull() ) != nullptr ) { + applyConfiguration(); + popsift::ImageBase* img = job->getImg(); private_init( img->getWidth(), img->getHeight() ); @@ -313,14 +339,21 @@ void PopSift::extractDownloadLoop( ) job->setFeatures( features ); } + + private_unit(); } void PopSift::matchPrepareLoop( ) { + cudaSetDevice(_device); + applyConfiguration(true); + Pipe& p = _pipe; SiftJob* job; while( ( job = p._queue_stage2.pull() ) != nullptr ) { + applyConfiguration(); + popsift::ImageBase* img = job->getImg(); private_init( img->getWidth(), img->getHeight() ); @@ -445,8 +478,4 @@ void PopSift::Pipe::uninit() popsift::ImageBase* img = _unused.pull(); delete img; } - - delete _pyramid; - _pyramid = nullptr; - } diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 446e103a..36648f8f 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -150,7 +150,7 @@ class PopSift * @brief We support more than 1 streams, but we support only one sigma and one * level parameters. */ - explicit PopSift( ImageMode imode = ByteImages ); + explicit PopSift( ImageMode imode = ByteImages, int device = 0 ); /** * @brief @@ -160,7 +160,7 @@ class PopSift */ explicit PopSift(const popsift::Config& config, popsift::Config::ProcessingMode mode = popsift::Config::ExtractingMode, - ImageMode imode = ByteImages); + ImageMode imode = ByteImages, int device = 0); /** * @brief Release all the resources. @@ -273,7 +273,10 @@ class PopSift } private: + bool applyConfiguration( bool force = false ); + bool private_init( int w, int h ); + bool private_unit( ); void private_apply_scale_factor( int& w, int& h ); void uploadImages( ); @@ -299,6 +302,7 @@ class PopSift int _last_init_w{}; /* to support deprecated interface */ int _last_init_h{}; /* to support deprecated interface */ ImageMode _image_mode; + int _device; /// whether the object is initialized bool _isInit{true}; diff --git a/src/popsift/sift_constants.cu b/src/popsift/sift_constants.cu index 7c8da529..e8e4f356 100755 --- a/src/popsift/sift_constants.cu +++ b/src/popsift/sift_constants.cu @@ -16,7 +16,7 @@ using namespace std; namespace popsift { -ConstInfo h_consts; +thread_local ConstInfo h_consts; __device__ __constant__ ConstInfo d_consts; void init_constants( float sigma0, int levels, float threshold, float edge_limit, int max_extrema, int normalization_multiplier ) diff --git a/src/popsift/sift_constants.h b/src/popsift/sift_constants.h index 58d5575d..20b3012d 100755 --- a/src/popsift/sift_constants.h +++ b/src/popsift/sift_constants.h @@ -68,7 +68,7 @@ struct ConstInfo float desc_tile[16]; }; -extern ConstInfo h_consts; +extern thread_local ConstInfo h_consts; extern __device__ __constant__ ConstInfo d_consts; diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu old mode 100755 new mode 100644 index f5cb3e2f..06060052 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -38,18 +38,15 @@ using namespace std; namespace popsift { -__device__ -ExtremaCounters dct; -ExtremaCounters hct; +__device__ ExtremaCounters dct; +thread_local ExtremaCounters hct; -__device__ -ExtremaBuffers dbuf; -ExtremaBuffers dbuf_shadow; // just for managing memories -ExtremaBuffers hbuf; +__device__ ExtremaBuffers dbuf; +thread_local ExtremaBuffers dbuf_shadow; // just for managing memories +thread_local ExtremaBuffers hbuf; -__device__ -DevBuffers dobuf; -DevBuffers dobuf_shadow; // just for managing memories +__device__ DevBuffers dobuf; +thread_local DevBuffers dobuf_shadow; // just for managing memories __global__ void py_print_corner_float(float* img, uint32_t pitch, uint32_t height, uint32_t level) @@ -215,6 +212,7 @@ Pyramid::~Pyramid() { cudaStreamDestroy( _download_stream ); + cudaFree( _d_extrema_num_blocks ); cudaFree( dobuf_shadow.i_ext_dat[0] ); cudaFree( dobuf_shadow.i_ext_off[0] ); cudaFree( dobuf_shadow.features ); diff --git a/src/popsift/sift_pyramid.h b/src/popsift/sift_pyramid.h index cfc6eaae..837fc3b1 100755 --- a/src/popsift/sift_pyramid.h +++ b/src/popsift/sift_pyramid.h @@ -50,13 +50,13 @@ struct DevBuffers Feature* features; }; -extern ExtremaCounters hct; -extern __device__ ExtremaCounters dct; -extern ExtremaBuffers hbuf; -extern __device__ ExtremaBuffers dbuf; -extern ExtremaBuffers dbuf_shadow; // just for managing memories -extern __device__ DevBuffers dobuf; -extern DevBuffers dobuf_shadow; // just for managing memories +extern thread_local ExtremaCounters hct; +extern __device__ ExtremaCounters dct; +extern thread_local ExtremaBuffers hbuf; +extern __device__ ExtremaBuffers dbuf; +extern thread_local ExtremaBuffers dbuf_shadow; // just for managing memories +extern __device__ DevBuffers dobuf; +extern thread_local DevBuffers dobuf_shadow; // just for managing memories class Pyramid { From e9cd9adf2ee511ad5ad1b5b36f1ef2699839b3f9 Mon Sep 17 00:00:00 2001 From: mitjap Date: Mon, 4 Jan 2021 20:53:40 +0100 Subject: [PATCH 255/285] minor improvements --- CHANGES.md | 1 + src/popsift/popsift.cpp | 6 ++++-- src/popsift/popsift.h | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index aaaad630..9608b25d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CMake: support for cuda 11 [PR](https://github.com/alicevision/popsift/pull/103) - Support for Cuda CC 7 cards (RTX 2080) [PR](https://github.com/alicevision/popsift/pull/67) - Support for Boost 1.70 [PR](https://github.com/alicevision/popsift/pull/65) +- Support for device selection and multiple GPUs [PR](https://github.com/alicevision/popsift/pull/121) ### Fixed - CMake: fixes to allow building on Windows using vcpkg [PR](https://github.com/alicevision/popsift/pull/92) diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index c3d912ca..253af961 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -140,7 +140,7 @@ bool PopSift::private_init( int w, int h ) return true; } -bool PopSift::private_unit() +bool PopSift::private_uninit() { Pipe& p = _pipe; @@ -340,7 +340,7 @@ void PopSift::extractDownloadLoop( ) job->setFeatures( features ); } - private_unit(); + private_uninit(); } void PopSift::matchPrepareLoop( ) @@ -369,6 +369,8 @@ void PopSift::matchPrepareLoop( ) job->setFeatures( features ); } + + private_uninit(); } SiftJob::SiftJob( int w, int h, const unsigned char* imageData ) diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 36648f8f..e8e83872 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -276,7 +276,7 @@ class PopSift bool applyConfiguration( bool force = false ); bool private_init( int w, int h ); - bool private_unit( ); + bool private_uninit( ); void private_apply_scale_factor( int& w, int& h ); void uploadImages( ); From c6bda37bb225921eb7dc89cd8c7faac7ce870fca Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Tue, 2 Feb 2021 23:06:18 +0100 Subject: [PATCH 256/285] [cmake] suppress deprecated cuda sm warnings --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9735bc2d..f39f2fec 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce co option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) +option(PopSift_NO_DEPRECATED_CUDA_SM_WARNINGS "Suppress warnings about soon to be deprecated cuda SM" ON) option(BUILD_SHARED_LIBS "Build shared libraries" ON) if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) @@ -105,6 +106,10 @@ endif() set(CUDA_SEPARABLE_COMPILATION ON) +if(PopSift_NO_DEPRECATED_CUDA_SM_WARNINGS) + list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") +endif() + if(UNIX AND NOT APPLE) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic") # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") From c37087afe4b832373a8f0380151740819ee2ad35 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 11 Mar 2021 22:56:01 +0100 Subject: [PATCH 257/285] [doc] vcpkg now has the popsift package --- doc/sphinx/source/install/install.rst | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/sphinx/source/install/install.rst b/doc/sphinx/source/install/install.rst index fb6eb014..da6d1dde 100644 --- a/doc/sphinx/source/install/install.rst +++ b/doc/sphinx/source/install/install.rst @@ -50,9 +50,20 @@ vcpkg `vcpkg `_ is a cross-platform (Windows, Linux and MacOS), open-source package manager created by Microsoft. -We are planning to release a port of the library so that it can be easily built using the package manager on all supported platforms. -Stay tuned! +Starting from v0.9, PopSift package can be installed on each platform via vcpkg. +To install the library: +.. code:: shell + + vcpkg install popsift --triplet + +where :code:`` is the architecture to build for (e.g. :code:`x64-windows`, :code:`x64-linux-dynamic` etc.) + +If you want to install the demo applications that come with the library you can add the option :code:`apps`: + +.. code:: shell + + vcpkg install popsift[apps] --triplet ------------ From 108564eb436b655ff6e79256bca0ba65f9d1d5ad Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 11 Mar 2021 22:56:23 +0100 Subject: [PATCH 258/285] [doc] refer to the rtd in the readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index cd664bab..dfa3b30e 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ PopSift is an open-source implementation of the SIFT algorithm in CUDA. PopSift tries to stick as closely as possible to David Lowe's famous paper [1], while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. +Check out the [documentation](https://popsift.readthedocs.io/) for more info. + ## HW requirements From 026a41ff6e06943de8fbaabbe7ac71bbe067238b Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 11 Mar 2021 22:56:45 +0100 Subject: [PATCH 259/285] [doc] move CI in a different location in readme --- README.md | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index dfa3b30e..2f257845 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ Check out the [documentation](https://popsift.readthedocs.io/) for more info. ## HW requirements - PopSift compiles and works with NVidia cards of compute capability >= 3.0 (including the GT 650M), but the code is developed with the compute capability 5.2 card GTX 980 Ti in mind. CUDA SDK 11 does no longer support compute capability 3.0. 3.5 is still supported with deprecation warning. @@ -46,15 +45,6 @@ Some build options are available: * `BUILD_SHARED_LIBS` (default: `ON`) controls the type of library to build (`ON` for shared libraries, `OFF` for static) - -### Continuous integration: -- [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=master)](https://travis-ci.org/alicevision/popsift) master branch. -- [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=develop)](https://travis-ci.org/alicevision/popsift) develop branch. -- [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) - develop branch. - - - ## Usage The main artifact created is `libpopsift`. @@ -86,8 +76,6 @@ Then, in order to build just pass the location of `PopSiftConfig.cmake` from the cmake .. -DPopSift_DIR=/lib/cmake/PopSift/ ``` - - ### Calling the API The caller must create a `popart::Config` struct (documented in `src/sift/sift_conf.h`) to control the behaviour of the PopSift, and instantiate an object of class `PopSift` (found in `src/sift/popsift.h`). @@ -111,6 +99,11 @@ PopSift can be configured at runtime to use constants that affect it behaviours. In particular, users can choose to generate results very similar to VLFeat or results that are closer (but not as close) to the SIFT implementation of the OpenCV extras. We acknowledge that there is at least one SIFT implementation that is vastly faster, but it makes considerable sacrifices in terms of accuracy and compatibility. +## Continuous integration: +- [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=master)](https://travis-ci.org/alicevision/popsift) master branch. +- [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=develop)](https://travis-ci.org/alicevision/popsift) develop branch. +- [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) + develop branch. ## License From a5f6d60c903dd201a10d7b88decd234648073547 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Thu, 24 Jun 2021 17:51:04 +0200 Subject: [PATCH 260/285] [doc] fix biblio build --- doc/sphinx/source/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx/source/conf.py b/doc/sphinx/source/conf.py index b85cd593..514bafe2 100644 --- a/doc/sphinx/source/conf.py +++ b/doc/sphinx/source/conf.py @@ -53,6 +53,7 @@ def configure_doxyfile(input_dir, output_dir): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['breathe', 'sphinxcontrib.bibtex'] +bibtex_bibfiles = ['biblio.bib'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] From 273adb1c375b12f285694488280e04efd251a76a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Sch=C3=B6ps?= Date: Sat, 28 Aug 2021 16:52:25 +0200 Subject: [PATCH 261/285] Add missing thrust include --- src/popsift/s_filtergrid.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index 078eb114..301c6a96 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include From f872278995aa24fc48af2cad79e9f12b51179676 Mon Sep 17 00:00:00 2001 From: Azhng Date: Fri, 29 Jan 2021 17:44:15 +0000 Subject: [PATCH 262/285] Replace exit() call with thrown exceptions Address #139 Previously, exit() function was called when popsift encountered errors (e.g. out of memory). This error handling made graceful error handling within the application using popsift difficult. This commit replaces all the exit() call with throwing a runtime error instead. --- src/popsift/common/debug_macros.cu | 98 ++++++++---------------------- src/popsift/common/debug_macros.h | 27 +++++--- src/popsift/common/plane_2d.cu | 15 +++-- src/popsift/common/plane_2d.h | 16 +++-- src/popsift/features.cu | 25 ++++---- src/popsift/gauss_filter.cu | 23 ++++--- src/popsift/popsift.cpp | 71 +++++++++++++++------- src/popsift/popsift.h | 5 ++ src/popsift/s_desc_notile.cu | 7 +-- 9 files changed, 138 insertions(+), 149 deletions(-) diff --git a/src/popsift/common/debug_macros.cu b/src/popsift/common/debug_macros.cu index cf4cd735..c9155248 100755 --- a/src/popsift/common/debug_macros.cu +++ b/src/popsift/common/debug_macros.cu @@ -20,12 +20,7 @@ void pop_sync_check_last_error( const char* file, size_t line ) void pop_check_last_error( const char* file, size_t line ) { cudaError_t err = cudaGetLastError( ); - if( err != cudaSuccess ) { - std::cerr << __FILE__ << ":" << __LINE__ << std::endl - << " called from " << file << ":" << line << std::endl - << " cudaGetLastError failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaGetLastError failed: "); } namespace popsift { namespace cuda { @@ -34,11 +29,7 @@ void malloc_dev( void** ptr, int sz, { cudaError_t err; err = cudaMalloc( ptr, sz ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaMalloc failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaMalloc failed: "); #ifdef DEBUG_INIT_DEVICE_ALLOCATIONS popsift::cuda::memset_sync( *ptr, 0, sz, file, line ); #endif // NDEBUG @@ -51,11 +42,7 @@ void malloc_hst( void** ptr, int sz, { cudaError_t err; err = cudaMallocHost( ptr, sz ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaMallocHost failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaMallocHost failed: "); #ifdef DEBUG_INIT_DEVICE_ALLOCATIONS memset( *ptr, 0, sz ); #endif // NDEBUG @@ -74,16 +61,13 @@ void memcpy_async( void* dst, const void* src, size_t sz, cudaError_t err; err = cudaMemcpyAsync( dst, src, sz, type, stream ); if( err != cudaSuccess ) { - cerr << file << ":" << line << endl - << " " << "Failed to copy " - << (type==cudaMemcpyHostToDevice?"host-to-device":"device-to-host") - << ": "; - cerr << cudaGetErrorString(err) << endl; - cerr << " src ptr=" << hex << (size_t)src << dec << endl - << " dst ptr=" << hex << (size_t)dst << dec << endl; - exit( -__LINE__ ); + std::stringstream ss; + ss << "Failed to copy " << (type == cudaMemcpyHostToDevice ? "host-to-device" : "device-to-host") << ": "; + ss << cudaGetErrorString(err) << endl; + ss << " src ptr=" << hex << (size_t)src << dec << endl + << " dst ptr=" << hex << (size_t)dst << dec << endl; + POP_FATAL(ss.str()); } - POP_CUDA_FATAL_TEST( err, "Failed to copy host-to-device: " ); } void memcpy_sync( void* dst, const void* src, size_t sz, cudaMemcpyKind type, const char* file, size_t line ) @@ -95,37 +79,27 @@ void memcpy_sync( void* dst, const void* src, size_t sz, cudaMemcpyKind type, co cudaError_t err; err = cudaMemcpy( dst, src, sz, type ); if( err != cudaSuccess ) { - cerr << " " << "Failed to copy " - << (type==cudaMemcpyHostToDevice?"host-to-device":"device-to-host") - << ": "; - cerr << cudaGetErrorString(err) << endl; - cerr << " src ptr=" << hex << (size_t)src << dec << endl - << " dst ptr=" << hex << (size_t)dst << dec << endl; - exit( -__LINE__ ); + std::stringstream ss; + ss << "Failed to copy " << (type == cudaMemcpyHostToDevice ? "host-to-device" : "device-to-host") << ": "; + ss << cudaGetErrorString(err) << endl; + ss << " src ptr=" << hex << (size_t)src << dec << endl + << " dst ptr=" << hex << (size_t)dst << dec << endl; + POP_FATAL(ss.str()) } - POP_CUDA_FATAL_TEST( err, "Failed to copy host-to-device: " ); } void memset_async( void* ptr, int value, size_t bytes, cudaStream_t stream, const char* file, size_t line ) { cudaError_t err; err = cudaMemsetAsync( ptr, value, bytes, stream ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaMemsetAsync failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaMemsetAsync failed: "); } void memset_sync( void* ptr, int value, size_t bytes, const char* file, size_t line ) { cudaError_t err; err = cudaMemset( ptr, value, bytes ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaMemset failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaMemset failed: "); } } } @@ -135,68 +109,44 @@ cudaStream_t stream_create( const char* file, size_t line ) cudaStream_t stream; cudaError_t err; err = cudaStreamCreate( &stream ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaStreamCreate failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaStreamCreate failed: "); return stream; } void stream_destroy( cudaStream_t s, const char* file, size_t line ) { cudaError_t err; err = cudaStreamDestroy( s ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaStreamDestroy failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaStreamDestroy failed: "); } cudaEvent_t event_create( const char* file, size_t line ) { cudaEvent_t ev; cudaError_t err; err = cudaEventCreate( &ev ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaEventCreate failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaEventCreate failed: "); return ev; } void event_destroy( cudaEvent_t ev, const char* file, size_t line ) { cudaError_t err; err = cudaEventDestroy( ev ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaEventDestroy failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaEventDestroy failed: "); } void event_record( cudaEvent_t ev, cudaStream_t s, const char* file, size_t line ) { cudaError_t err; err = cudaEventRecord( ev, s ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaEventRecord failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaEventRecord failed: "); } void event_wait( cudaEvent_t ev, cudaStream_t s, const char* file, size_t line ) { cudaError_t err; err = cudaStreamWaitEvent( s, ev, 0 ); - if( err != cudaSuccess ) { - std::cerr << file << ":" << line << std::endl - << " cudaStreamWaitEvent failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaStreamWaitEvent failed: "); } float event_diff( cudaEvent_t from, cudaEvent_t to ) -{ +{ float ms; cudaEventElapsedTime( &ms, from, to ); return ms; diff --git a/src/popsift/common/debug_macros.h b/src/popsift/common/debug_macros.h index dada7d3b..c8f31651 100755 --- a/src/popsift/common/debug_macros.h +++ b/src/popsift/common/debug_macros.h @@ -13,6 +13,7 @@ #include #include #include +#include #include // synchronize device and check for an error @@ -117,14 +118,18 @@ class BriefDuration }; }; -#define POP_FATAL(s) { \ - std::cerr << __FILE__ << ":" << __LINE__ << std::endl << " " << s << std::endl; \ - exit( -__LINE__ ); \ +#define POP_FATAL(s) \ + { \ + std::stringstream ss; \ + ss << __FILE__ << ":" << __LINE__ << std::endl << " " << s; \ + throw std::runtime_error{ss.str()}; \ } -#define POP_FATAL_FL(s,file,line) { \ - std::cerr << file << ":" << line << std::endl << " " << s << std::endl; \ - exit( -__LINE__ ); \ +#define POP_FATAL_FL(s, file, line) \ + { \ + std::stringstream ss; \ + ss << file << ":" << line << std::endl << " " << s << std::endl; \ + throw std::runtime_error{ss.str()}; \ } #define POP_CHECK_NON_NULL(ptr,s) if( ptr == 0 ) { POP_FATAL_FL(s,__FILE__,__LINE__); } @@ -147,10 +152,12 @@ class BriefDuration std::cerr << __FILE__ << ":" << __LINE__ << std::endl; \ std::cerr << " WARNING: " << s << cudaGetErrorString(err) << std::endl; \ } -#define POP_CUDA_FATAL(err,s) { \ - std::cerr << __FILE__ << ":" << __LINE__ << std::endl; \ - std::cerr << " " << s << cudaGetErrorString(err) << std::endl; \ - exit( -__LINE__ ); \ +#define POP_CUDA_FATAL(err,s) \ + { \ + std::stringstream ss; \ + ss << __FILE__ << ":" << __LINE__ << std::endl; \ + ss << " " << s << cudaGetErrorString(err) << std::endl; \ + throw std::runtime_error{ss.str()}; \ } #define POP_CUDA_FATAL_TEST(err,s) if( err != cudaSuccess ) { POP_CUDA_FATAL(err,s); } diff --git a/src/popsift/common/plane_2d.cu b/src/popsift/common/plane_2d.cu index 773dac24..89ba7d34 100644 --- a/src/popsift/common/plane_2d.cu +++ b/src/popsift/common/plane_2d.cu @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef _WIN32 #include #else @@ -65,11 +66,11 @@ void* PlaneBase::allocHost2D( int w, int h, int elemSize, PlaneMapMode m ) #else const char *buf = strerror(errno); #endif - cerr << __FILE__ << ":" << __LINE__ << endl - << " Failed to allocate " << sz << " bytes of unaligned host memory." << endl - << " Cause: " << buf << endl; - exit( -1 ); - } else if( m == PageAligned ) { + stringstream ss; + ss << "Failed to allocate " << sz << " bytes of unaligned host memory." << endl + << "Cause: " << buf; + POP_FATAL(ss.str()); + } else if(m == PageAligned) { void* ptr = memalign(getPageSize(), sz); if(ptr) return ptr; @@ -93,9 +94,7 @@ void* PlaneBase::allocHost2D( int w, int h, int elemSize, PlaneMapMode m ) POP_CUDA_FATAL_TEST( err, "Failed to allocate aligned and pinned host memory: " ); return ptr; } else { - cerr << __FILE__ << ":" << __LINE__ << endl - << " Alignment not correctly specified in host plane allocation" << endl; - exit( -1 ); + POP_FATAL("Alignment not correctly specified in host plane allocation"); } } diff --git a/src/popsift/common/plane_2d.h b/src/popsift/common/plane_2d.h index 86d26f5c..2dad48cd 100644 --- a/src/popsift/common/plane_2d.h +++ b/src/popsift/common/plane_2d.h @@ -16,6 +16,8 @@ #include #include +#include "debug_macros.h" + #define PLANE2D_CUDA_OP_DEBUG #ifndef NDEBUG @@ -407,14 +409,16 @@ __host__ inline void Plane2D::memcpyToDevice( Plane2D& devPlane, cudaStream_t stream ) { if( devPlane._cols != this->_cols ) { - std::cerr << __FILE__ << ":" << __LINE__ << std::endl - << " Error: source columns (" << this->_cols << ") and dest columns (" << devPlane._cols << ") must be identical" << std::endl; - exit( -1 ); + std::stringstream ss; + ss << "Error: source columns (" << this->_cols << ") and dest columns (" << devPlane._cols + << ") must be identical"; + POP_FATAL(ss.str()); } if( devPlane._rows != this->_rows ) { - std::cerr << __FILE__ << ":" << __LINE__ << std::endl - << " Error: source rows (" << this->_rows << ") and dest rows (" << devPlane._rows << ") must be identical" << std::endl; - exit( -1 ); + std::stringstream ss; + ss << "Error: source rows (" << this->_rows << ") and dest rows (" << devPlane._rows + << ") must be identical"; + POP_FATAL(ss.str()); } PitchPlane2D::memcpyToDevice( devPlane, this->_cols, this->_rows, stream ); } diff --git a/src/popsift/features.cu b/src/popsift/features.cu index 023279ff..5aa706a1 100755 --- a/src/popsift/features.cu +++ b/src/popsift/features.cu @@ -16,6 +16,7 @@ #include #include #include +#include using namespace std; @@ -61,19 +62,21 @@ void FeaturesHost::reset( int num_ext, int num_ori ) _ext = (Feature*)memalign( getPageSize(), num_ext * sizeof(Feature) ); if( _ext == nullptr ) { - cerr << __FILE__ << ":" << __LINE__ << " Runtime error:" << endl - << " Failed to (re)allocate memory for downloading " << num_ext << " features" << endl; - if( errno == EINVAL ) cerr << " Alignment is not a power of two." << endl; - if( errno == ENOMEM ) cerr << " Not enough memory." << endl; - exit( -1 ); + std::stringstream ss; + ss << "Runtime error:" << endl + << " Failed to (re)allocate memory for downloading " << num_ext << " features" << endl; + if(errno == EINVAL) ss << " Alignment is not a power of two."; + if(errno == ENOMEM) ss << " Not enough memory."; + POP_FATAL(ss.str()); } _ori = (Descriptor*)memalign( getPageSize(), num_ori * sizeof(Descriptor) ); - if( _ori == nullptr ) { - cerr << __FILE__ << ":" << __LINE__ << " Runtime error:" << endl - << " Failed to (re)allocate memory for downloading " << num_ori << " descriptors" << endl; - if( errno == EINVAL ) cerr << " Alignment is not a power of two." << endl; - if( errno == ENOMEM ) cerr << " Not enough memory." << endl; - exit( -1 ); + if(_ori == nullptr) { + std::stringstream ss; + ss << "Runtime error:" << endl + << " Failed to (re)allocate memory for downloading " << num_ori << " descriptors" << endl; + if(errno == EINVAL) ss << " Alignment is not a power of two."; + if(errno == ENOMEM) ss << " Not enough memory."; + POP_FATAL(ss.str()); } setFeatureCount( num_ext ); diff --git a/src/popsift/gauss_filter.cu b/src/popsift/gauss_filter.cu index 537c843e..7c425f7f 100755 --- a/src/popsift/gauss_filter.cu +++ b/src/popsift/gauss_filter.cu @@ -130,17 +130,17 @@ void init_filter( const Config& conf, { if( sigma0 > 2.0 ) { - cerr << __FILE__ << ":" << __LINE__ << ", ERROR: " - << " Sigma > 2.0 is not supported. Re-size __constant__ array and recompile." - << endl; - exit( -__LINE__ ); + stringstream ss; + ss << "ERROR: " + << " Sigma > 2.0 is not supported. Re-size __constant__ array and recompile."; + POP_FATAL(ss.str()); } if( levels > GAUSS_LEVELS ) { - cerr << __FILE__ << ":" << __LINE__ << ", ERROR: " - << " More than " << GAUSS_LEVELS << " levels not supported. Re-size __constant__ array and recompile." - << endl; - exit( -__LINE__ ); + stringstream ss; + ss << "ERROR: " + << " More than " << GAUSS_LEVELS << " levels not supported. Re-size __constant__ array and recompile."; + POP_FATAL(ss.str()); } if( conf.ifPrintGaussTables() ) { @@ -291,10 +291,9 @@ int GaussInfo::getSpan( float sigma ) const case Config::Fixed15 : return 8; default : - cerr << __FILE__ << ":" << __LINE__ << ", ERROR: " - << " The mode for computing Gauss filter scan is invalid" - << endl; - exit( -__LINE__ ); + stringstream ss; + ss << "ERROR: The mode for computing Gauss filter scan is invalid"; + POP_FATAL(ss.str()); } } diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 253af961..f7e983aa 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -12,10 +12,13 @@ #include "gauss_filter.h" #include "sift_config.h" #include "sift_pyramid.h" +#include "common/debug_macros.h" #include #include #include +#include +#include using namespace std; @@ -154,7 +157,7 @@ void PopSift::uninit( ) { if(!_isInit) { - std::cout << "[warning] Attempt to release resources from an uninitialized instance" << std::endl; + std::cerr << "[warning] Attempt to release resources from an uninitialized instance" << std::endl; return; } _pipe.uninit(); @@ -248,9 +251,10 @@ SiftJob* PopSift::enqueue( int w, { if( _image_mode != ByteImages ) { - cerr << __FILE__ << ":" << __LINE__ << " Image mode error" << endl - << "E Cannot load byte images into a PopSift pipeline configured for float images" << endl; - exit( -1 ); + stringstream ss; + ss << "Image mode error" << endl + << "E Cannot load byte images into a PopSift pipeline configured for float images"; + POP_FATAL(ss.str()); } AllocTest a = testTextureFit( w, h ); @@ -272,9 +276,10 @@ SiftJob* PopSift::enqueue( int w, { if( _image_mode != FloatImages ) { - cerr << __FILE__ << ":" << __LINE__ << " Image mode error" << endl - << "E Cannot load float images into a PopSift pipeline configured for byte images" << endl; - exit( -1 ); + stringstream ss; + ss << "Image mode error" << endl + << "E Cannot load float images into a PopSift pipeline configured for byte images"; + POP_FATAL(ss.str()); } AllocTest a = testTextureFit( w, h ); @@ -352,20 +357,29 @@ void PopSift::matchPrepareLoop( ) SiftJob* job; while( ( job = p._queue_stage2.pull() ) != nullptr ) { - applyConfiguration(); - - popsift::ImageBase* img = job->getImg(); + popsift::FeaturesDev* features; + try + { + applyConfiguration(); - private_init( img->getWidth(), img->getHeight() ); + popsift::ImageBase* img = job->getImg(); - p._pyramid->step1( _config, img ); - p._unused.push( img ); // uploaded input image no longer needed, release for reuse + private_init(img->getWidth(), img->getHeight()); - p._pyramid->step2( _config ); + p._pyramid->step1(_config, img); + p._unused.push(img); // uploaded input image no longer needed, release for reuse - popsift::FeaturesDev* features = p._pyramid->clone_device_descriptors( _config ); + p._pyramid->step2(_config); - cudaDeviceSynchronize(); + features = p._pyramid->clone_device_descriptors(_config); + cudaDeviceSynchronize(); + } + catch(const std::exception& e) + { + job->setError(std::current_exception()); + job->setFeatures(nullptr); + break; + } job->setFeatures( features ); } @@ -387,9 +401,10 @@ SiftJob::SiftJob( int w, int h, const unsigned char* imageData ) } else { - cerr << __FILE__ << ":" << __LINE__ << " Memory limitation" << endl - << "E Failed to allocate memory for SiftJob" << endl; - exit( -1 ); + stringstream ss; + ss << "Memory limitation" << endl + << "E Failed to allocate memory for SiftJob"; + POP_FATAL(ss.str()); } } @@ -407,9 +422,10 @@ SiftJob::SiftJob( int w, int h, const float* imageData ) } else { - cerr << __FILE__ << ":" << __LINE__ << " Memory limitation" << endl - << "E Failed to allocate memory for SiftJob" << endl; - exit( -1 ); + stringstream ss; + ss << "Memory limitation" << endl + << "E Failed to allocate memory for SiftJob"; + POP_FATAL(ss.str()); } } @@ -458,7 +474,16 @@ popsift::FeaturesHost* SiftJob::getHost() popsift::FeaturesDev* SiftJob::getDev() { - return dynamic_cast( _f.get() ); + popsift::FeaturesBase* features = _f.get(); + if(this->_err != nullptr) { + std::rethrow_exception(this->_err); + } + return dynamic_cast(features); +} + +void SiftJob::setError(std::exception_ptr ptr) +{ + this->_err = ptr; } void PopSift::Pipe::uninit() diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index e8e83872..3b5f72b8 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -15,9 +15,11 @@ #include +#include #include #include #include +#include #include #include @@ -47,6 +49,7 @@ class SiftJob int _h; unsigned char* _imageData; popsift::ImageBase* _img; + std::exception_ptr _err; #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) nvtxRangeId_t _nvtx_id; #endif @@ -92,6 +95,8 @@ class SiftJob /** fulfill the promise */ void setFeatures( popsift::FeaturesBase* f ); + + void setError(std::exception_ptr ptr); }; /** diff --git a/src/popsift/s_desc_notile.cu b/src/popsift/s_desc_notile.cu index a336898b..9ba8a927 100644 --- a/src/popsift/s_desc_notile.cu +++ b/src/popsift/s_desc_notile.cu @@ -13,6 +13,7 @@ #include #include +#include // 1 -> 19.6 on 980 Ti // 2 -> 19.5 on 980 Ti @@ -151,11 +152,7 @@ bool start_ext_desc_notile( int octave, Octave& oct_obj ) oct_obj.getDataTexLinear( ).tex ); cudaDeviceSynchronize(); cudaError_t err = cudaGetLastError( ); - if( err != cudaSuccess ) { - std::cerr << __FILE__ << ":" << __LINE__ << std::endl - << " cudaGetLastError failed: " << cudaGetErrorString(err) << std::endl; - exit( -__LINE__ ); - } + POP_CUDA_FATAL_TEST(err, "cudaGetLastError failed: "); POP_SYNC_CHK; From 7b664e27ca2865f3e06101d50415f2943d2de58c Mon Sep 17 00:00:00 2001 From: Azhng Date: Tue, 21 Feb 2023 19:24:57 +0000 Subject: [PATCH 263/285] add required thrust include for s_filtergrid.cu --- src/popsift/s_filtergrid.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index 301c6a96..a766c2de 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -19,9 +19,11 @@ #if ! POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER) #include +#include #include #include #include +#include #include #include #include From fb3c1073254b892f073a3a80dbc9d51cbb2171ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20De=20Lillo?= Date: Mon, 6 Mar 2023 11:15:40 +0000 Subject: [PATCH 264/285] [popsift] Use width in elements instead of bytes in checkLimit_2DsurfLayered --- src/popsift/common/device_prop.cu | 2 +- src/popsift/common/device_prop.h | 3 +-- src/popsift/popsift.cpp | 11 +++-------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/popsift/common/device_prop.cu b/src/popsift/common/device_prop.cu index 8436a69b..a55821cd 100644 --- a/src/popsift/common/device_prop.cu +++ b/src/popsift/common/device_prop.cu @@ -274,7 +274,7 @@ bool device_prop_t::checkLimit_2DsurfLayered( int& width, int& height, int& laye std::cerr << __FILE__ << ":" << __LINE__ << ": CUDA device " << currentDevice << std::endl << " does not support layered 2D surfaces " << width - << " bytes wide." << endl; + << " pixels wide." << endl; } width = ptr->maxSurface2DLayered[0]; returnSuccess = false; diff --git a/src/popsift/common/device_prop.h b/src/popsift/common/device_prop.h index ed5db2b2..7a0b142d 100644 --- a/src/popsift/common/device_prop.h +++ b/src/popsift/common/device_prop.h @@ -91,8 +91,7 @@ class device_prop_t /** * @brief Check if a request exceeds the current CUDA device's limit in * surface2DLayered dimensions. surface2DLayered is the writable equivalent - * to texture2DLayered, but the width must be given in bytes, not elements. - * Since we use float, images cannot be as wide as expected. + * to texture2DLayered. * @param[in,out] width Desired width of the texture. * @param[in,out] height Desired height of the texture. * @param[in,out] layers Desired depth of the texture. diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cpp index 253af961..6224eb93 100755 --- a/src/popsift/popsift.cpp +++ b/src/popsift/popsift.cpp @@ -184,12 +184,7 @@ PopSift::AllocTest PopSift::testTextureFit( int width, int height ) */ int depth = _config.levels + 3; - /* Surfaces have a limited width in bytes, not in elements. - * Our DOG pyramid stores 4/byte floats, so me must check for - * that width. - */ - int byteWidth = width * sizeof(float); - retval = _device_properties.checkLimit_2DsurfLayered( byteWidth, + retval = _device_properties.checkLimit_2DsurfLayered( width, height, depth, warn ); @@ -216,13 +211,13 @@ std::string PopSift::testTextureFitErrorString( AllocTest err, int width, int he { const float upscaleFactor = _config.getUpscaleFactor(); const float scaleFactor = 1.0f / powf( 2.0f, -upscaleFactor ); - int w = ceilf( width * scaleFactor ) * sizeof(float); + int w = ceilf( width * scaleFactor ); int h = ceilf( height * scaleFactor ); int d = _config.levels + 3; _device_properties.checkLimit_2DsurfLayered( w, h, d, false ); - w = w / scaleFactor / sizeof(float); + w = w / scaleFactor; h = h / scaleFactor; ostr << "E Cannot use" << (upscaleFactor==1 ? " default " : " ") From dc7302e5807134ced27e8f451592ed90b4f8bf38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20De=20Lillo?= Date: Mon, 6 Mar 2023 11:19:23 +0000 Subject: [PATCH 265/285] [popsift] plane_2d: Use size_t type for memory size in bytes Use size_t type to avoid short type overflow with large size images. --- src/popsift/common/plane_2d.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/popsift/common/plane_2d.h b/src/popsift/common/plane_2d.h index 86d26f5c..e262714f 100644 --- a/src/popsift/common/plane_2d.h +++ b/src/popsift/common/plane_2d.h @@ -168,10 +168,10 @@ template struct PitchPlane2D : public PlaneT PlaneBase::freeHost2D( this->data, mode ); } __host__ __device__ - inline short getPitchInBytes( ) const { return _pitchInBytes; } + inline size_t getPitchInBytes( ) const { return _pitchInBytes; } protected: - int _pitchInBytes; // pitch width in bytes + size_t _pitchInBytes; // pitch width in bytes }; /************************************************************* @@ -338,7 +338,7 @@ template class Plane2D : public PitchPlane2D __host__ __device__ inline short getHeight( ) const { return _rows; } __host__ __device__ - inline short getByteSize( ) const { return this->_pitchInBytes*_rows; } + inline size_t getByteSize( ) const { return this->_pitchInBytes * _rows; } __host__ inline void allocDev( int w, int h ) { _cols = w; From 88acf53c35bc763af297ff8de213b51c7b8c71f5 Mon Sep 17 00:00:00 2001 From: Billy Robert O'Neal III Date: Thu, 16 Mar 2023 15:28:38 -0700 Subject: [PATCH 266/285] Add support for CUDA versions through 12.1. --- cmake/ChooseCudaCC.cmake | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index 425e8bd5..36c73178 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -65,7 +65,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(CC_LIST_BY_SYSTEM_PROCESSOR "") if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86") + list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86;87;89;90") endif() if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") @@ -78,10 +78,20 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) # Default setting of the CUDA CC versions to compile. # Shortening the lists saves a lot of compile time. # - set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 86) - if(CUDA_VERSION VERSION_GREATER_EQUAL 11.1) + + # The current version last time this list was updated was CUDA 12.1. + if(CUDA_VERSION VERSION_GREATER_EQUAL 12) + set(CUDA_MIN_CC 50) + set(CUDA_MAX_CC 90) + elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.8) + set(CUDA_MIN_CC 35) + set(CUDA_MAX_CC 90) + elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.6) + set(CUDA_MIN_CC 35) + set(CUDA_MAX_CC 87) + elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.1) set(CUDA_MIN_CC 35) + set(CUDA_MAX_CC 86) elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 11) set(CUDA_MIN_CC 35) set(CUDA_MAX_CC 80) @@ -92,8 +102,10 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(CUDA_MIN_CC 30) set(CUDA_MAX_CC 72) elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8) + set(CUDA_MIN_CC 20) set(CUDA_MAX_CC 62) elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7) + set(CUDA_MIN_CC 20) set(CUDA_MAX_CC 53) else() message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0") From 9e5a50d86311f76a3591a2e9ab4eaa87a845a30f Mon Sep 17 00:00:00 2001 From: Billy Robert O'Neal III Date: Tue, 21 Mar 2023 12:13:34 -0700 Subject: [PATCH 267/285] Remove checking for hardware 87. --- cmake/ChooseCudaCC.cmake | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake index 36c73178..aba4eb91 100755 --- a/cmake/ChooseCudaCC.cmake +++ b/cmake/ChooseCudaCC.cmake @@ -65,7 +65,7 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) set(CC_LIST_BY_SYSTEM_PROCESSOR "") if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86;87;89;90") + list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86;89;90") endif() if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") @@ -86,9 +86,6 @@ function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.8) set(CUDA_MIN_CC 35) set(CUDA_MAX_CC 90) - elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.6) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 87) elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.1) set(CUDA_MIN_CC 35) set(CUDA_MAX_CC 86) From d49f2ed68c493be6406dbaf77e4148b9b4b6faea Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Mar 2023 21:40:18 +0100 Subject: [PATCH 268/285] [ci] added github actions for CI --- .github/workflows/continuous-integration.yml | 64 ++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .github/workflows/continuous-integration.yml diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml new file mode 100644 index 00000000..e4e42a57 --- /dev/null +++ b/.github/workflows/continuous-integration.yml @@ -0,0 +1,64 @@ +name: Continuous Integration + +on: + push: + branches: + - master + - develop + # Skip jobs when only documentation files are changed + paths-ignore: + - '**.md' + - '**.rst' + - 'docs/**' + pull_request: + paths-ignore: + - '**.md' + - '**.rst' + - 'docs/**' + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + container: ["alicevision/popsift-deps:cuda10.2-ubuntu18.04", "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04", "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04"] + build_tpe: ["Release", "Debug"] + + container: + image: ${{ matrix.container }} + + env: + DEPS_INSTALL_DIR: /opt/ + BUILD_TYPE: Release + CTEST_OUTPUT_ON_FAILURE: 1 + steps: + - uses: actions/checkout@v2 + + - name: Prepare File Tree + run: | + mkdir ./build + mkdir ./build_as_3rdparty + mkdir ../popsift_install + + - name: Configure CMake + working-directory: ./build + run: | + cmake .. \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_PREFIX_PATH="${DEPS_INSTALL_DIR}" \ + -DPopSift_BUILD_DOCS:BOOL=OFF \ + -DCMAKE_INSTALL_PREFIX:PATH=$PWD/../../popsift_install + + - name: Build + working-directory: ./build + run: | + make -j$(nproc) install + + - name: Build As Third Party + working-directory: ./build_as_3rdparty + run: | + cmake ../src/application \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_PREFIX_PATH:PATH="$PWD/../../popsift_install;${DEPS_INSTALL_DIR}" + make -j$(nproc) From 819f70d820cb77cf2fe7f332ea3528cd510e213f Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Mar 2023 22:30:41 +0100 Subject: [PATCH 269/285] [docker] updated docker for dependencies --- Dockerfile_deps | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile_deps b/Dockerfile_deps index bed8cc59..d5560d4e 100644 --- a/Dockerfile_deps +++ b/Dockerfile_deps @@ -7,7 +7,7 @@ LABEL maintainer="AliceVision Team alicevision@googlegroups.com" # see https://hub.docker.com/r/nvidia/cuda/ # # For example, to create a ubuntu 16.04 with cuda 8.0 for development, use -# docker build --build-arg CUDA_TAG=8.0 --tag alicevision/popsift-deps:cuda${CUDA_TAG}-ubuntu${OS_TAG} . +# docker build --build-arg CUDA_TAG=8.0 --tag alicevision/popsift-deps:cuda${CUDA_TAG}-ubuntu${OS_TAG} -f Dockerfile_deps . # # then execute with nvidia docker (https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) # docker run -it --runtime=nvidia popsift_deps @@ -32,12 +32,12 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libboost-thread-dev \ && rm -rf /var/lib/apt/lists/* - # Manually install cmake +# Manually install cmake WORKDIR /tmp/cmake -ENV CMAKE_VERSION=3.17 +ENV CMAKE_VERSION=3.24 ENV CMAKE_VERSION_FULL=${CMAKE_VERSION}.2 -RUN wget https://cmake.org/files/v3.17/cmake-${CMAKE_VERSION_FULL}.tar.gz && \ - tar zxvf cmake-${CMAKE_VERSION_FULL}.tar.gz && \ +RUN wget https://cmake.org/files/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION_FULL}.tar.gz && \ + tar zxf cmake-${CMAKE_VERSION_FULL}.tar.gz && \ cd cmake-${CMAKE_VERSION_FULL} && \ ./bootstrap --prefix=/usr/local -- -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_USE_OPENSSL:BOOL=ON && \ make -j$(nproc) install && \ From cfe0c6ec9d3e965aaa694dcca1a7471c8769b138 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Mar 2023 22:31:06 +0100 Subject: [PATCH 270/285] [doc] fix codacy badge in readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f257845..99bd3d95 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # PopSift -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/8b0f7a68bc0d4df2ac89c6e732917caa)](https://app.codacy.com/manual/alicevision/popsift?utm_source=github.com&utm_medium=referral&utm_content=alicevision/popsift&utm_campaign=Badge_Grade_Settings) +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) [! +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/64f9192b53df46b483e7cf5be7e2dddd)](https://app.codacy.com/gh/alicevision/popsift/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) PopSift is an open-source implementation of the SIFT algorithm in CUDA. PopSift tries to stick as closely as possible to David Lowe's famous paper [1], while extracting features from an image in real-time at least on an NVidia GTX 980 Ti GPU. From 0ebc70d2bc4df92d71bf98c12b9f1126f60266b7 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Fri, 24 Mar 2023 22:32:10 +0100 Subject: [PATCH 271/285] [doc] update github action badge in readme fix #148 --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 99bd3d95..149bce79 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # PopSift -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) [! +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/3728/badge)](https://bestpractices.coreinfrastructure.org/projects/3728) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/64f9192b53df46b483e7cf5be7e2dddd)](https://app.codacy.com/gh/alicevision/popsift/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) PopSift is an open-source implementation of the SIFT algorithm in CUDA. @@ -101,10 +101,10 @@ In particular, users can choose to generate results very similar to VLFeat or re We acknowledge that there is at least one SIFT implementation that is vastly faster, but it makes considerable sacrifices in terms of accuracy and compatibility. ## Continuous integration: -- [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=master)](https://travis-ci.org/alicevision/popsift) master branch. -- [![Build Status](https://travis-ci.org/alicevision/popsift.svg?branch=develop)](https://travis-ci.org/alicevision/popsift) develop branch. +- ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=master) master branch on Linux. +- ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=develop) develop branch on Linux. - [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) - develop branch. + develop branch on Windows. ## License From 0e99e6988a3f137262a58f1b88601b79e623989c Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 25 Mar 2023 15:49:28 +0100 Subject: [PATCH 272/285] [doc]codacy fix --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 149bce79..2284d1a3 100644 --- a/README.md +++ b/README.md @@ -101,8 +101,11 @@ In particular, users can choose to generate results very similar to VLFeat or re We acknowledge that there is at least one SIFT implementation that is vastly faster, but it makes considerable sacrifices in terms of accuracy and compatibility. ## Continuous integration: + - ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=master) master branch on Linux. + - ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=develop) develop branch on Linux. + - [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) develop branch on Windows. From 7469b407d450d052b43c8a7893d0801298040feb Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 25 Mar 2023 15:50:04 +0100 Subject: [PATCH 273/285] [ci] take into account build type --- .github/workflows/continuous-integration.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index e4e42a57..2bea3b99 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -29,7 +29,7 @@ jobs: env: DEPS_INSTALL_DIR: /opt/ - BUILD_TYPE: Release + BUILD_TYPE: ${{ matrix.build_tpe }} CTEST_OUTPUT_ON_FAILURE: 1 steps: - uses: actions/checkout@v2 @@ -60,5 +60,6 @@ jobs: run: | cmake ../src/application \ -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DCMAKE_PREFIX_PATH:PATH="$PWD/../../popsift_install;${DEPS_INSTALL_DIR}" make -j$(nproc) From 7801d02e3506bee30e151e3d6f0e17eca825d3cc Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 25 Mar 2023 16:18:21 +0100 Subject: [PATCH 274/285] [doc] codacy requires * instead of - --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2284d1a3..418d1278 100644 --- a/README.md +++ b/README.md @@ -102,12 +102,11 @@ We acknowledge that there is at least one SIFT implementation that is vastly fas ## Continuous integration: -- ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=master) master branch on Linux. +* ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=master) master branch on Linux. -- ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=develop) develop branch on Linux. +* ![Continuous Integration](https://github.com/alicevision/popsift/workflows/Continuous%20Integration/badge.svg?branch=develop) develop branch on Linux. -- [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) - develop branch on Windows. +* [![Build status](https://ci.appveyor.com/api/projects/status/rsm5269hs288c2ji/branch/develop?svg=true)](https://ci.appveyor.com/project/AliceVision/popsift/branch/develop) develop branch on Windows. ## License From 84280d5970ebe604192d8d1d520effed3742eb53 Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 25 Mar 2023 16:18:50 +0100 Subject: [PATCH 275/285] [ci] exclude Debug on latest cuda as there is a segmentation error --- .github/workflows/continuous-integration.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 2bea3b99..f165597e 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -23,6 +23,10 @@ jobs: matrix: container: ["alicevision/popsift-deps:cuda10.2-ubuntu18.04", "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04", "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04"] build_tpe: ["Release", "Debug"] + exclude: + # excludes debug on this one as it has a segmentation fault during the compilation (!) + - container: "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04" + build_tpe: "Debug" container: image: ${{ matrix.container }} From e8bb2052888bde5ef7c3226a044b2cb139b6d64d Mon Sep 17 00:00:00 2001 From: Simone Gasparini Date: Sat, 10 Jun 2023 17:32:10 +0200 Subject: [PATCH 276/285] missing include --- src/popsift/common/debug_macros.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/popsift/common/debug_macros.h b/src/popsift/common/debug_macros.h index c8f31651..a497750c 100755 --- a/src/popsift/common/debug_macros.h +++ b/src/popsift/common/debug_macros.h @@ -15,6 +15,7 @@ #include #include #include +#include // synchronize device and check for an error void pop_sync_check_last_error( const char* file, size_t line ); From acb7be5154b3ac9fe18011f7883fc9f8182eec86 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 26 Jul 2024 09:55:29 +0200 Subject: [PATCH 277/285] [cmake] CUDA as first order language The supported CCs are specified differently now, "native" and "all" simplify choices. libcudevrt is now a private dependencies of libpopsift. Applications using popsift with other CUDA functions may have to link it explicitly. --- CHANGES.md | 4 + CMakeLists.txt | 110 ++++++------------ cmake/ChooseCudaCC.cmake | 184 ------------------------------ cmake/sift_config.h.in | 12 +- src/CMakeLists.txt | 23 ++-- src/application/main.cpp | 2 +- src/application/match.cpp | 2 +- src/popsift/common/device_prop.cu | 10 ++ src/popsift/common/device_prop.h | 9 ++ 9 files changed, 78 insertions(+), 278 deletions(-) delete mode 100755 cmake/ChooseCudaCC.cmake diff --git a/CHANGES.md b/CHANGES.md index 9608b25d..1cfc0b51 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.0] - YYYY-MM-DD +## 2024 + +- CMake: CUDA as first-order language, different CC selection + ### Added - Improved checks for CUDA textures [PR](https://github.com/alicevision/popsift/pull/89) - CMake: Improved support for all Cuda CC [PR](https://github.com/alicevision/popsift/pull/75) diff --git a/CMakeLists.txt b/CMakeLists.txt index f39f2fec..e4dfd920 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,16 @@ # CMake below 3.4 does not work with CUDA separable compilation at all -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.27) -project(PopSift VERSION 1.0.0 LANGUAGES CXX) +project(PopSift VERSION 1.0.0 LANGUAGES CXX CUDA) + +# Policy to support CUDA as a first-order language for CMake. +# Since CMake 3.18. See https://cmake.org/cmake/help/latest/policy/CMP0104.html +cmake_policy(SET CMP0104 NEW) + +set(CMAKE_CUDA_ARCHITECTURES "all-major" + CACHE + STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons" + FORCE) # Set build path as a folder named as the platform (linux, windows, darwin...) plus the processor type set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") @@ -16,7 +25,6 @@ option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce co option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) -option(PopSift_NO_DEPRECATED_CUDA_SM_WARNINGS "Suppress warnings about soon to be deprecated cuda SM" ON) option(BUILD_SHARED_LIBS "Build shared libraries" ON) if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) @@ -55,17 +63,15 @@ include(GNUInstallDirs) if(BUILD_SHARED_LIBS) message(STATUS "BUILD_SHARED_LIBS ON") - # Need to declare CUDA_USE_STATIC_CUDA_RUNTIME as an option to ensure that it is not overwritten in FindCUDA. - option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" OFF) - set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) - # Workaround to force deactivation of cuda static runtime for cmake < 3.10 - set(CUDA_cudart_static_LIBRARY 0) + # Auto-build dll exports on Windows set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + + set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) else() message(STATUS "BUILD_SHARED_LIBS OFF") - option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON) - set(CUDA_USE_STATIC_CUDA_RUNTIME ON) + + set(CMAKE_CUDA_RUNTIME_LIBRARY Static) endif() # Require threads because of std::thread. @@ -74,26 +80,15 @@ find_package(Threads REQUIRED) ################### # CUDA ################### -find_package(CUDA 7.0 REQUIRED) +include(CheckLanguage) +check_language(CUDA) -if(NOT CUDA_FOUND) - message(FATAL_ERROR "Could not find CUDA >= 7.0") -endif() +# Use this if necessary: "cmake -DCUDAToolkit_ROOT=/some/path" +# target_link_libraries(binary_linking_to_cudart PRIVATE CUDA::cudart) +find_package(CUDAToolkit) -message(STATUS "CUDA Version is ${CUDA_VERSION}") - -include(ChooseCudaCC) -if(NOT DEFINED PopSift_CUDA_CC_LIST) - chooseCudaCC(PopSift_CUDA_CC_LIST_BASIC - PopSift_CUDA_GENCODE_FLAGS - MIN_CC 30 - MIN_CUDA_VERSION 7.0) - set(PopSift_CUDA_CC_LIST ${PopSift_CUDA_CC_LIST_BASIC} CACHE STRING "CUDA CC versions to compile") -else() - getFlagsForCudaCCList(PopSift_CUDA_CC_LIST - PopSift_CUDA_GENCODE_FLAGS) -endif() -list(APPEND CUDA_NVCC_FLAGS "${PopSift_CUDA_GENCODE_FLAGS}") +message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}") +set(CUDA_VERSION ${CUDAToolkit_VERSION}) if(PopSift_USE_NVTX_PROFILING) message(STATUS "PROFILING CPU CODE: NVTX is in use") @@ -104,46 +99,8 @@ if(PopSift_ERRCHK_AFTER_KERNEL) list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL") endif() -set(CUDA_SEPARABLE_COMPILATION ON) - -if(PopSift_NO_DEPRECATED_CUDA_SM_WARNINGS) - list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") -endif() - -if(UNIX AND NOT APPLE) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-rdynamic") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-v") - # set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xptxas;-warn-double-usage") - list(APPEND CUDA_NVCC_FLAGS_DEBUG "--keep") - list(APPEND CUDA_NVCC_FLAGS_DEBUG "--source-in-ptx") -endif() - -# The following if should not be necessary, but apparently there is a bug in FindCUDA.cmake that -# generate an empty string in the nvcc command line causing the compilation to fail. -# see https://gitlab.kitware.com/cmake/cmake/issues/16411 -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message(STATUS "Building in debug mode") - list(APPEND CUDA_NVCC_FLAGS_DEBUG "-G") -endif() -list(APPEND CUDA_NVCC_FLAGS_RELEASE "-O3") - -if(PopSift_USE_POSITION_INDEPENDENT_CODE AND NOT MSVC) - list(APPEND CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") -endif() - -# this is to ensure that on MSVC the flags for the linker are properly propagate even to the intermediate -# linking step. This seems not the case e.g. on vcpkg using ninja build. -if(MSVC) - if(BUILD_SHARED_LIBS) - set(PopSift_MVSC_LINKER "/MD") - else() - set(PopSift_MVSC_LINKER "/MT") - endif() - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(PopSift_MVSC_LINKER "${PopSift_MVSC_LINKER}d") - endif() - list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${PopSift_MVSC_LINKER}) -endif() +# This may not be required any more. +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # default stream per-thread implies that each host thread has one non-synchronizing 0-stream # currently, the code requires legacy mode @@ -159,7 +116,7 @@ if(CUDA_VERSION VERSION_GREATER_EQUAL "7.5") endif() endif() -set(PopSift_CXX_STANDARD 14) # Thrust/CUB requires C++14 starting with CUDA SDK 11 +set(PopSift_CXX_STANDARD 17) # Thrust/CUB requires C++14 starting with CUDA SDK 11 if(CUDA_VERSION_MAJOR LESS_EQUAL 8) set(PopSift_CXX_STANDARD 11) endif() @@ -181,9 +138,9 @@ else() endif() if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") - set(HAVE_SHFL_DOWN_SYNC 1) + set(PopSift_HAVE_SHFL_DOWN_SYNC 1) else() - set(HAVE_SHFL_DOWN_SYNC 0) + set(PopSift_HAVE_SHFL_DOWN_SYNC 0) endif() if(NOT PopSift_USE_GRID_FILTER) @@ -193,12 +150,8 @@ else() set(DISABLE_GRID_FILTER 0) endif() -# library required for CUDA dynamic parallelism, forgotten by CMake 3.4 -cuda_find_library_local_first(CUDA_CUDADEVRT_LIBRARY cudadevrt "\"cudadevrt\" library") - if(PopSift_USE_NVTX_PROFILING) # library required for NVTX profiling of the CPU - cuda_find_library_local_first(CUDA_NVTX_LIBRARY nvToolsExt "NVTX library") set(PopSift_USE_NVTX 1) else() set(PopSift_USE_NVTX 0) @@ -245,9 +198,14 @@ message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING}) message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL}) message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER}) message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS}) -message(STATUS "Compiling for CUDA CCs: ${PopSift_CUDA_CC_LIST}") message(STATUS "Install path: " ${CMAKE_INSTALL_PREFIX}) message(STATUS "Testing step: " ${PopSift_USE_TEST_CMD}) + +message(STATUS "CMAKE_CUDA_COMPILER = ${CMAKE_CUDA_COMPILER}") +message(STATUS "CMAKE_CUDA_COMPILER_ID = ${CMAKE_CUDA_COMPILER_ID}") +message(STATUS "CMAKE_CUDA_COMPILER_VERSION = ${CMAKE_CUDA_COMPILER_VERSION}") +message(STATUS "CMAKE_CUDA_ARCHITECTURES = ${CMAKE_CUDA_ARCHITECTURES}") + if(PopSift_USE_TEST_CMD) message(STATUS "Path for test input: " ${PopSift_TESTFILE_PATH}) endif() diff --git a/cmake/ChooseCudaCC.cmake b/cmake/ChooseCudaCC.cmake deleted file mode 100755 index aba4eb91..00000000 --- a/cmake/ChooseCudaCC.cmake +++ /dev/null @@ -1,184 +0,0 @@ -# -# This file contains two functions: -# chooseCudaCC -# getFlagsForCudaCCList -# -# Motivation: -# CUDA hardware and SDKs are developing over time, different SDK support different -# hardware, and supported hardware differs depending on platform even for the same -# SDK version. This file attempts to provide a function that returns a valid selection -# of hardware for the current SDK and platform. It will require updates as CUDA develops, -# and it is currently not complete in terms of existing platforms that support CUDA. -# - -# -# Return the minimal set of supported Cuda CC -# -# Usage: -# chooseCudaCC(SUPPORTED_CC SUPPORTED_GENCODE_FLAGS -# [MIN_CUDA_VERSION X.Y] -# [MIN_CC XX ]) -# -# SUPPORTED_CC out variable. Stores the list of supported CC. -# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS -# MIN_CUDA_VERSION the minimal supported version of cuda (e.g. 7.5, default 7.0). -# MIN_CC minimal supported Cuda CC by the project (e.g. 35, default 20) -# -# This function does not edit cache entries or variables in the parent scope -# except for the variables whose names are supplied for SUPPORTED_CC and -# SUPPORTED_GENCODE_FLAGS -# -# You may want to cache SUPPORTED_CC and append SUPPORTED_GENCODE_FLAGS to -# CUDA_NVCC_FLAGS. -# Like this: -# set(MYCC ${MYCC} CACHE STRING "CUDA CC versions to compile") -# end -# set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};${MY_GENCODE_FLAGS}") -# -function(chooseCudaCC SUPPORTED_CC SUPPORTED_GENCODE_FLAGS) - set(options "") - set(oneValueArgs MIN_CUDA_VERSION MIN_CC) - set(multipleValueArgs "") - cmake_parse_arguments(CHOOSE_CUDA "${options}" "${oneValueArgs}" "${multipleValueArgs}" ${ARGN}) - - if(NOT DEFINED CHOOSE_CUDA_MIN_CC) - set(CHOOSE_CUDA_MIN_CC 20) - endif() - if(NOT DEFINED CHOOSE_CUDA_MIN_CUDA_VERSION) - set(CHOOSE_CUDA_MIN_CUDA_VERSION 7.0) - endif() - - find_package(CUDA ${CHOOSE_CUDA_MIN_CUDA_VERSION} REQUIRED) - - if(NOT CUDA_FOUND) - message(FATAL_ERROR "Could not find CUDA >= ${CHOOSE_CUDA_MIN_CUDA_VERSION}") - endif() - - # - # Create a list of possible CCs for each host processor. - # This may require tuning: CUDA cards exist in AIX machines with POWER CPUs, - # it is possible that non-Tegra ARM systems exist as well. - # For now, this is my best guess. - # - set(TEGRA_SUPPORTED_PROCESSORS "armv71;arm;aarch64") - set(OTHER_SUPPORTED_PROCESSORS "i686;x86_64;AMD64") - - set(CC_LIST_BY_SYSTEM_PROCESSOR "") - if(CMAKE_SYSTEM_PROCESSOR IN_LIST OTHER_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "20;21;30;35;50;52;60;61;70;75;80;86;89;90") - endif() - if(CMAKE_SYSTEM_PROCESSOR IN_LIST TEGRA_SUPPORTED_PROCESSORS) - list(APPEND CC_LIST_BY_SYSTEM_PROCESSOR "32;53;62;72") - endif() - if(NOT CC_LIST_BY_SYSTEM_PROCESSOR) - message(FATAL_ERROR "Unknown how to build for ${CMAKE_SYSTEM_PROCESSOR}") - endif() - - # - # Default setting of the CUDA CC versions to compile. - # Shortening the lists saves a lot of compile time. - # - - # The current version last time this list was updated was CUDA 12.1. - if(CUDA_VERSION VERSION_GREATER_EQUAL 12) - set(CUDA_MIN_CC 50) - set(CUDA_MAX_CC 90) - elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.8) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 90) - elseif(CUDA_VERSION VERSION_GREATER_EQUAL 11.1) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 86) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 11) - set(CUDA_MIN_CC 35) - set(CUDA_MAX_CC 80) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 10) - set(CUDA_MIN_CC 30) - set(CUDA_MAX_CC 75) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 9) - set(CUDA_MIN_CC 30) - set(CUDA_MAX_CC 72) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 8) - set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 62) - elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 7) - set(CUDA_MIN_CC 20) - set(CUDA_MAX_CC 53) - else() - message(FATAL_ERROR "We do not support a CUDA SDK below version 7.0") - endif() - if(${CHOOSE_CUDA_MIN_CC} GREATER ${CUDA_MIN_CC}) - set(CUDA_MIN_CC ${CHOOSE_CUDA_MIN_CC}) - endif() - - set(CC_LIST "") - foreach(CC ${CC_LIST_BY_SYSTEM_PROCESSOR}) - if( (${CC} GREATER_EQUAL ${CUDA_MIN_CC}) AND - (${CC} LESS_EQUAL ${CUDA_MAX_CC}) ) - list(APPEND CC_LIST ${CC}) - endif() - endforeach() - - # - # Add all requested CUDA CCs to the command line for offline compilation - # - set(GENCODE_FLAGS "") - list(SORT CC_LIST) - foreach(CC_VERSION ${CC_LIST}) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") - endforeach() - - # - # Use the highest request CUDA CC for CUDA JIT compilation - # - list(LENGTH CC_LIST CC_LIST_LEN) - MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") - list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") - - # - # Two variables are exported to the parent scope. One is passed through the - # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) - # - set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) - set(${SUPPORTED_CC} "${CC_LIST}" PARENT_SCOPE) -endfunction() - -# -# Return the gencode parameters for a given list of CCs. -# -# Usage: -# getFlagsForCudaCCList(INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) -# -# INPUT_CC_LIST in variable. Contains a list of supported CCs. -# SUPPORTED_GENCODE_FLAGS out variable. List of gencode flags to append to, e.g., CUDA_NVCC_FLAGS -# -function(getFlagsForCudaCCList INPUT_CC_LIST SUPPORTED_GENCODE_FLAGS) - set(CC_LIST "${${INPUT_CC_LIST}}") - - # - # Add all requested CUDA CCs to the command line for offline compilation - # - set(GENCODE_FLAGS "") - list(SORT CC_LIST) - foreach(CC_VERSION ${CC_LIST}) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_VERSION},code=sm_${CC_VERSION}") - endforeach() - - # - # Use the highest request CUDA CC for CUDA JIT compilation - # - list(LENGTH CC_LIST CC_LIST_LEN) - MATH(EXPR CC_LIST_LEN "${CC_LIST_LEN}-1") - list(GET CC_LIST ${CC_LIST_LEN} CC_LIST_LAST) - list(APPEND GENCODE_FLAGS "-gencode;arch=compute_${CC_LIST_LAST},code=compute_${CC_LIST_LAST}") - - message(STATUS "Setting gencode flags: ${GENCODE_FLAGS}") - - # - # Two variables are exported to the parent scope. One is passed through the - # environment (CUDA_NVCC_FLAGS), the other is passed by name (SUPPORTED_CC) - # - set(${SUPPORTED_GENCODE_FLAGS} "${GENCODE_FLAGS}" PARENT_SCOPE) -endfunction() - diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in index 427cfe42..86095a55 100644 --- a/cmake/sift_config.h.in +++ b/cmake/sift_config.h.in @@ -8,11 +8,11 @@ #pragma once -#define POPSIFT_IS_DEFINED(F) F() == 1 - -#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @HAVE_SHFL_DOWN_SYNC@ -#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ -#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ -#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ +#define POPSIFT_IS_DEFINED(F) F() == 1 +#define POPSIFT_IS_UNDEFINED(F) F() == 0 +#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@ +#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ +#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ +#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0380dd41..52c1691f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,5 @@ -CUDA_ADD_LIBRARY(popsift +add_library(popsift popsift/popsift.cpp popsift/popsift.h popsift/features.cu popsift/features.h popsift/sift_constants.cu popsift/sift_constants.h @@ -39,6 +39,18 @@ CUDA_ADD_LIBRARY(popsift popsift/common/vec_macros.h popsift/common/clamp.h) +target_link_libraries(popsift + PUBLIC + CUDA::cublas + Threads::Threads) + +set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) +set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") +set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +if(PopSift_USE_NVTX) + target_link_libraries(popsift PUBLIC CUDA::nvtx3) +endif(PopSift_USE_NVTX) # build directory containing the automatically generated files set(popsift_generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") @@ -53,15 +65,6 @@ target_include_directories(popsift $ ${CUDA_INCLUDE_DIRS}) - -set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) -set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") - -# cannot use PRIVATE here as there is a bug in FindCUDA and CUDA_ADD_LIBRARY -# https://gitlab.kitware.com/cmake/cmake/issues/16097 -target_link_libraries(popsift ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} Threads::Threads) - - # EXPORTING THE LIBRARY # # place to put the cmake-related files diff --git a/src/application/main.cpp b/src/application/main.cpp index 0eec1c22..a35725f0 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -265,7 +265,7 @@ void read_job( SiftJob* job, bool really_write ) int main(int argc, char **argv) { - cudaDeviceReset(); + popsift::cuda::reset(); popsift::Config config; list inputFiles; diff --git a/src/application/match.cpp b/src/application/match.cpp index 852d9b62..73054271 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -219,7 +219,7 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) int main(int argc, char **argv) { - cudaDeviceReset(); + popsift::cuda::reset(); popsift::Config config; string lFile{}; diff --git a/src/popsift/common/device_prop.cu b/src/popsift/common/device_prop.cu index a55821cd..44d47c1d 100644 --- a/src/popsift/common/device_prop.cu +++ b/src/popsift/common/device_prop.cu @@ -14,6 +14,16 @@ namespace popsift { namespace cuda { using namespace std; +void reset() +{ + cudaDeviceReset(); +} + +void sync() +{ + cudaDeviceSynchronize(); +} + device_prop_t::device_prop_t( ) { int currentDevice; diff --git a/src/popsift/common/device_prop.h b/src/popsift/common/device_prop.h index 7a0b142d..8910e9c8 100644 --- a/src/popsift/common/device_prop.h +++ b/src/popsift/common/device_prop.h @@ -13,6 +13,15 @@ namespace popsift { namespace cuda { +/** A call to cudaDeviceReset() + */ +void reset(); + +/** A call to cudaDeviceSynchronize() + */ +void sync(); + + /** * @brief A class to recover, query and print the information about the cuda device. */ From ed6602656c81aa39972a544306c2cc1b80c617a6 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Wed, 31 Jul 2024 07:30:17 +0200 Subject: [PATCH 278/285] [cuda/c++] PI is needed on host as well, C++20 constants not available yet --- src/popsift/sift_constants.h | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/popsift/sift_constants.h b/src/popsift/sift_constants.h index 20b3012d..883515a7 100755 --- a/src/popsift/sift_constants.h +++ b/src/popsift/sift_constants.h @@ -15,20 +15,27 @@ #ifndef NINF #define NINF (-INF) #endif -#ifdef M_PI -#undef M_PI -// #define M_PI 3.14159265358979323846f -#endif -__device__ static const -float M_PI = 3.14159265358979323846f; -#ifdef M_PI2 -#undef M_PI2 -// #define M_PI2 (2.0F * M_PI) -#endif -__device__ static const -float M_PI2 = 2.0f * 3.14159265358979323846f; -#define M_4RPI (4.0f / M_PI) +#undef USE_CONSTANT_PI +#ifdef USE_CONSTANT_PI + #ifdef M_PI + #undef M_PI + #endif + __device__ static const float M_PI = 3.14159265358979323846f; + #ifdef M_PI2 + #undef M_PI2 + #endif + __device__ static const float M_PI2 = 2.0f * 3.14159265358979323846f; + #define M_4RPI (4.0f / M_PI) +#else + #ifndef M_PI + #define M_PI 3.14159265358979323846f + #endif + #ifndef M_PI2 + #define M_PI2 (2.0F * M_PI) + #endif + #define M_4RPI (4.0f / M_PI) +#endif #define DESC_MIN_FLOAT 1E-15F From ddbc2e59a759f19f854b967f94b170bf370dbba4 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 26 Jul 2024 15:12:30 +0200 Subject: [PATCH 279/285] [ci] update Travis require at least Boost 1.71 for demo applications use Ubuntu 22.04 and CUDA SDK 11/12 use CMake to 3.24 remove cuBlas from linking, not used in PopSift --- .github/workflows/continuous-integration.yml | 6 ++- .travis.yml | 41 ++++++-------------- CMakeLists.txt | 6 +-- src/CMakeLists.txt | 22 +++++++---- src/application/CMakeLists.txt | 6 +-- src/application/main.cpp | 20 ---------- src/application/match.cpp | 12 ------ src/popsift/{popsift.cpp => popsift.cu} | 0 8 files changed, 36 insertions(+), 77 deletions(-) rename src/popsift/{popsift.cpp => popsift.cu} (100%) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index f165597e..8b46ec1b 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -21,12 +21,16 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - container: ["alicevision/popsift-deps:cuda10.2-ubuntu18.04", "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04", "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04"] + container: ["alicevision/popsift-deps:cuda11.8.0-ubuntu20.04", "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04"] build_tpe: ["Release", "Debug"] exclude: # excludes debug on this one as it has a segmentation fault during the compilation (!) - container: "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04" build_tpe: "Debug" + - container: "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04" + build_tpe: "Release" + - container: "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04" + build_tpe: "Debug" container: image: ${{ matrix.container }} diff --git a/.travis.yml b/.travis.yml index dd24a21b..030cb775 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,4 @@ -dist: xenial +dist: jammy language: cpp compiler: gcc @@ -13,10 +13,8 @@ addons: env: matrix: - - CUDA_VERSION_MAJOR="8" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.61-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="9" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.148-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="10" CUDA_VERSION_MINOR="2" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.89-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" - - CUDA_VERSION_MAJOR="11" CUDA_VERSION_MINOR="0" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.2-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="11" CUDA_VERSION_MINOR="8" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.0-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" + - CUDA_VERSION_MAJOR="12" CUDA_VERSION_MINOR="5" CUDA_PKG_LONGVERSION="${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}.1-1" CUDA_PKG_VERSION="${CUDA_VERSION_MAJOR}-${CUDA_VERSION_MINOR}" global: @@ -34,7 +32,7 @@ env: - POPSIFT_APP_INSTALL_RELEASE=${POPSIFT_APP_BUILD_RELEASE}/install - POPSIFT_APP_INSTALL_DEBUG=${POPSIFT_APP_BUILD_DEBUG}/install # CMAKE - - CMAKE_URL="https://cmake.org/files/v3.13/cmake-3.13.5-Linux-x86_64.tar.gz" + - CMAKE_URL="https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.tar.gz" - CMAKE_ROOT=${TRAVIS_BUILD_DIR}/cmake - CMAKE_SOURCE=${CMAKE_ROOT}/source - CMAKE_INSTALL=${CMAKE_ROOT}/install @@ -57,30 +55,12 @@ before_install: fi install: - - UBUNTU_VERSION=ubuntu1604 - - > - if [ ${CUDA_VERSION_MAJOR} -lt 11 ]; then - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub - sudo dpkg -i $CUDA_REPO_PKG - rm ${CUDA_REPO_PKG} - travis_retry sudo apt-get -y update - # cuda > 10.0 changed cublas naming - if [ ${CUDA_VERSION_MAJOR} -lt 10 ]; then - CUBLAS_PKG=cuda-cublas-dev-$CUDA_PKG_VERSION - else - CUBLAS_PKG=libcublas-dev - fi - travis_retry sudo apt-get install -y --no-install-recommends --allow-unauthenticated cuda-core-$CUDA_PKG_VERSION cuda-cudart-dev-$CUDA_PKG_VERSION ${CUBLAS_PKG} cuda-curand-dev-$CUDA_PKG_VERSION - sudo ln -s /usr/local/cuda-${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} /usr/local/cuda - else - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin - travis_retry sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600 - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub - travis_retry sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /" - sudo apt-get update && sudo apt-get -y install cuda - fi + - UBUNTU_VERSION=ubuntu2204 + - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/cuda-${UBUNTU_VERSION}.pin + - travis_retry sudo mv cuda-${UBUNTU_VERSION}.pin /etc/apt/preferences.d/cuda-repository-pin-600 + - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/3bf863cc.pub + - travis_retry sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/ /" + - sudo apt-get update && sudo apt-get -y install cuda # - CUDA_REPO_PKG=cuda-repo-${UBUNTU_VERSION}_${CUDA_PKG_LONGVERSION}_amd64.deb # - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/$CUDA_REPO_PKG @@ -131,3 +111,4 @@ cache: apt: true directories: - ${CMAKE_INSTALL} + diff --git a/CMakeLists.txt b/CMakeLists.txt index e4dfd920..06b08e3e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ -# CMake below 3.4 does not work with CUDA separable compilation at all -cmake_minimum_required(VERSION 3.27) +# First-order language CUDA requires at least CMake 3.18 +cmake_minimum_required(VERSION 3.24) project(PopSift VERSION 1.0.0 LANGUAGES CXX CUDA) @@ -7,7 +7,7 @@ project(PopSift VERSION 1.0.0 LANGUAGES CXX CUDA) # Since CMake 3.18. See https://cmake.org/cmake/help/latest/policy/CMP0104.html cmake_policy(SET CMP0104 NEW) -set(CMAKE_CUDA_ARCHITECTURES "all-major" +set(CMAKE_CUDA_ARCHITECTURES "native" CACHE STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons" FORCE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 52c1691f..07dc81f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ -add_library(popsift - popsift/popsift.cpp popsift/popsift.h +add_library(popsift SHARED + popsift/popsift.cu popsift/popsift.h popsift/features.cu popsift/features.h popsift/sift_constants.cu popsift/sift_constants.h popsift/sift_conf.cu popsift/sift_conf.h @@ -41,29 +41,35 @@ add_library(popsift target_link_libraries(popsift PUBLIC - CUDA::cublas + CUDA::cudart + CUDA::nvToolsExt Threads::Threads) +# expected library to link was: "CUDA::nvtx3" but it appears to be simply nvToolsExt +# or maybe ${NVTX_LIBRARY} ??? + +# It is necessary to choose between shared and static, otherwise the CUDA_RUNTIME_LIBRARY target property +# will not be set. +# According to https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_RUNTIME_LIBRARY.html +string(APPEND CMAKE_CUDA_FLAGS " --cudart shared") set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -if(PopSift_USE_NVTX) - target_link_libraries(popsift PUBLIC CUDA::nvtx3) -endif(PopSift_USE_NVTX) - # build directory containing the automatically generated files set(popsift_generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated") # BUILD_INTERFACE allows to include the directory with source only when target is # built in the building tree (ie, not from an install location) +# The CUDA install dir variable has changed from the old CUDA_INCLUDE_DIRS to the new CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES target_include_directories(popsift PUBLIC $ $ $ $ - ${CUDA_INCLUDE_DIRS}) + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ) # EXPORTING THE LIBRARY # diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 3b28cec8..0b6bdf20 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -42,7 +42,7 @@ find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUD if(PopSift_BOOST_USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) endif() -find_package(Boost 1.53.0 REQUIRED COMPONENTS filesystem program_options system) +find_package(Boost 1.71.0 REQUIRED COMPONENTS filesystem program_options system) if(WIN32) add_definitions("-DBOOST_ALL_NO_LIB") endif(WIN32) @@ -73,7 +73,7 @@ add_executable(popsift-demo main.cpp pgmread.cpp pgmread.h) set_property(TARGET popsift-demo PROPERTY CXX_STANDARD 11) target_compile_options(popsift-demo PRIVATE ${PD_COMPILE_OPTIONS} ) -target_include_directories(popsift-demo PUBLIC ${PD_INCLUDE_DIRS}) +target_include_directories(popsift-demo PUBLIC PopSift::popsift ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-demo PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-demo PUBLIC PopSift::popsift ${PD_LINK_LIBS}) @@ -87,7 +87,7 @@ add_executable(popsift-match match.cpp pgmread.cpp pgmread.h) set_property(TARGET popsift-match PROPERTY CXX_STANDARD 11) target_compile_options(popsift-match PRIVATE ${PD_COMPILE_OPTIONS} ) -target_include_directories(popsift-match PUBLIC ${PD_INCLUDE_DIRS}) +target_include_directories(popsift-match PUBLIC PopSift::popsift ${PD_INCLUDE_DIRS}) target_compile_definitions(popsift-match PRIVATE ${Boost_DEFINITIONS}) target_link_libraries(popsift-match PUBLIC PopSift::popsift ${PD_LINK_LIBS}) diff --git a/src/application/main.cpp b/src/application/main.cpp index a35725f0..bf1128ff 100755 --- a/src/application/main.cpp +++ b/src/application/main.cpp @@ -30,13 +30,6 @@ #endif #include "pgmread.h" -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace std; static bool print_dev_info = false; @@ -183,8 +176,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) exit( -1 ); } - nvtxRangePushA( "load and convert image - devil" ); - ilImage img; if( img.Load( inputFile.c_str() ) == false ) { cerr << "Could not load image " << inputFile << endl; @@ -200,8 +191,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) image_data = img.GetData(); - nvtxRangePop( ); // "load and convert image - devil" - job = PopSift.enqueue( w, h, image_data ); img.Clear(); @@ -209,7 +198,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) else #endif { - nvtxRangePushA( "load and convert image - pgmread" ); int w{}; int h{}; image_data = readPGMfile( inputFile, w, h ); @@ -217,8 +205,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) exit( EXIT_FAILURE ); } - nvtxRangePop( ); // "load and convert image - pgmread" - if( ! float_mode ) { // PopSift.init( w, h ); @@ -251,16 +237,10 @@ void read_job( SiftJob* job, bool really_write ) << endl; if( really_write ) { - nvtxRangePushA( "Writing features to disk" ); - std::ofstream of( "output-features.txt" ); feature_list->print( of, write_as_uchar ); } delete feature_list; - - if( really_write ) { - nvtxRangePop( ); // Writing features to disk - } } int main(int argc, char **argv) diff --git a/src/application/match.cpp b/src/application/match.cpp index 73054271..3460975d 100755 --- a/src/application/match.cpp +++ b/src/application/match.cpp @@ -30,13 +30,6 @@ #endif #include "pgmread.h" -#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include -#else -#define nvtxRangePushA(a) -#define nvtxRangePop() -#endif - using namespace std; static bool print_dev_info {false}; @@ -171,7 +164,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) unsigned char* image_data; SiftJob* job; - nvtxRangePushA( "load and convert image" ); #ifdef USE_DEVIL if( ! pgmread_loading ) { @@ -189,8 +181,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) cout << "Loading " << w << " x " << h << " image " << inputFile << endl; image_data = img.GetData(); - nvtxRangePop( ); - // PopSift.init( w, h ); job = PopSift.enqueue( w, h, image_data ); @@ -206,8 +196,6 @@ SiftJob* process_image( const string& inputFile, PopSift& PopSift ) exit( EXIT_FAILURE ); } - nvtxRangePop( ); - // PopSift.init( w, h ); job = PopSift.enqueue( w, h, image_data ); diff --git a/src/popsift/popsift.cpp b/src/popsift/popsift.cu similarity index 100% rename from src/popsift/popsift.cpp rename to src/popsift/popsift.cu From 7bbad3802a4fd757e1c50c28a48d065f67d87c0a Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Fri, 26 Jul 2024 13:35:54 +0200 Subject: [PATCH 280/285] [ci] Update cudaInstallAppveyor.cmd added dependencies on CUDAToolkit to config file change CC from native to all-major because build system has no GPU platform toolset for VS19 disable NVTX explicitly --- CMakeLists.txt | 2 +- appveyor.yml | 6 ++--- cmake/Config.cmake.in | 1 + cudaInstallAppveyor.cmd | 49 +++++++++++++++++++++++++++++++---------- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06b08e3e..a9c033cb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ project(PopSift VERSION 1.0.0 LANGUAGES CXX CUDA) # Since CMake 3.18. See https://cmake.org/cmake/help/latest/policy/CMP0104.html cmake_policy(SET CMP0104 NEW) -set(CMAKE_CUDA_ARCHITECTURES "native" +set(CMAKE_CUDA_ARCHITECTURES "all-major" CACHE STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons" FORCE) diff --git a/appveyor.yml b/appveyor.yml index dc822f2b..e7bac169 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,13 +1,13 @@ version: '1.0.{build}' -image: Visual Studio 2015 +image: Visual Studio 2022 platform: - x64 configuration: - Release - - Debug +# - Debug #environment: # matrix: @@ -26,7 +26,7 @@ install: before_build: - md build - cd build - - cmake -G "Visual Studio 14 2015" -A x64 -T v140,host=x64 -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 30eea599..b8fce13a 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -40,6 +40,7 @@ include(CMakeFindDependencyMacro) find_dependency(Threads REQUIRED) +find_dependency(CUDAToolkit REQUIRED) include("${CMAKE_CURRENT_LIST_DIR}/@popsift_targets_export_name@.cmake") check_required_components("@PROJECT_NAME@") diff --git a/cudaInstallAppveyor.cmd b/cudaInstallAppveyor.cmd index 381d4048..9d43f7fa 100644 --- a/cudaInstallAppveyor.cmd +++ b/cudaInstallAppveyor.cmd @@ -1,19 +1,44 @@ @echo off -echo Downloading CUDA toolkit 9 -appveyor DownloadFile https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_windows -FileName cuda_9.1.85_windows.exe +echo Downloading CUDA toolkit 12 for Windows 10 +# appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/12.5.1/network_installers/cuda_12.5.1_windows_network.exe -Filename cuda_12.5.1_windows.exe + +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip +appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip +dir + +echo Unzipping CUDA toolkit 12 +tar -xf cuda_nvcc.zip +tar -xf cuda_cudart.zip +tar -xf cuda_nvtx.zip +tar -xf vs_integration.zip dir -echo Installing CUDA toolkit 9 -cuda_9.1.85_windows.exe -s nvcc_9.1 ^ - cublas_9.1 ^ - cublas_dev_9.1 ^ - cudart_9.1 ^ - curand_9.1 ^ - curand_dev_9.1 -echo CUDA toolkit 9 installed +echo Making CUDA install dir(s) +mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" +mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" + +echo Copying toolkit files to install dir(s) +xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y +xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y +xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y +xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y + + +# echo Installing CUDA toolkit 12 +# cuda_12.5.1_windows.exe +# cuda_9.1.85_windows.exe -s nvcc_12.5 cudart_12.5 + + +echo CUDA toolkit 12 installed dir "%ProgramFiles%" -set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.1\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v9.1\libnvvp;%PATH% +set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\libnvvp;%PATH% + +dir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA" +dir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" +dir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\bin" -nvcc -V \ No newline at end of file +nvcc -V From cdff08d3055cd9383465d8f34c6aa3be1faa6312 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 30 Jul 2024 15:52:29 +0200 Subject: [PATCH 281/285] [cuda] updating from classic NVXT to nvtx3 --- src/CMakeLists.txt | 9 ++++++--- src/application/CMakeLists.txt | 4 ---- src/popsift/popsift.h | 2 +- src/popsift/s_filtergrid.cu | 2 +- src/popsift/s_image.cu | 2 +- src/popsift/s_orientation.cu | 2 +- src/popsift/sift_desc.cu | 2 +- src/popsift/sift_pyramid.cu | 2 +- 8 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 07dc81f3..4764192c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,10 +42,13 @@ add_library(popsift SHARED target_link_libraries(popsift PUBLIC CUDA::cudart - CUDA::nvToolsExt Threads::Threads) -# expected library to link was: "CUDA::nvtx3" but it appears to be simply nvToolsExt -# or maybe ${NVTX_LIBRARY} ??? + +if(PopSift_USE_NVTX_PROFILING) +target_link_libraries(popsift + PUBLIC + CUDA::nvtx3) +endif() # It is necessary to choose between shared and static, otherwise the CUDA_RUNTIME_LIBRARY target property # will not be set. diff --git a/src/application/CMakeLists.txt b/src/application/CMakeLists.txt index 0b6bdf20..2379c57d 100755 --- a/src/application/CMakeLists.txt +++ b/src/application/CMakeLists.txt @@ -60,10 +60,6 @@ else() set(PD_COMPILE_OPTIONS "" ) endif() -if(PopSift_USE_NVTX_PROFILING) - list(APPEND PD_LINK_LIBS ${CUDA_NVTX_LIBRARY}) -endif(PopSift_USE_NVTX_PROFILING) - ############################################################# # popsift-demo ############################################################# diff --git a/src/popsift/popsift.h b/src/popsift/popsift.h index 3b5f72b8..5654cc76 100755 --- a/src/popsift/popsift.h +++ b/src/popsift/popsift.h @@ -24,7 +24,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangeStartA(a) #define nvtxRangeEnd(a) diff --git a/src/popsift/s_filtergrid.cu b/src/popsift/s_filtergrid.cu index a766c2de..bfe2e64e 100644 --- a/src/popsift/s_filtergrid.cu +++ b/src/popsift/s_filtergrid.cu @@ -10,7 +10,7 @@ #include "sift_pyramid.h" #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/s_image.cu b/src/popsift/s_image.cu index f26b8e3e..a966dd39 100755 --- a/src/popsift/s_image.cu +++ b/src/popsift/s_image.cu @@ -16,7 +16,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/s_orientation.cu b/src/popsift/s_orientation.cu index f6b36fcd..b34aaaa1 100644 --- a/src/popsift/s_orientation.cu +++ b/src/popsift/s_orientation.cu @@ -19,7 +19,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/sift_desc.cu b/src/popsift/sift_desc.cu index b0eb0bd1..f533df35 100644 --- a/src/popsift/sift_desc.cu +++ b/src/popsift/sift_desc.cu @@ -22,7 +22,7 @@ #include #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() diff --git a/src/popsift/sift_pyramid.cu b/src/popsift/sift_pyramid.cu index 06060052..c03b0d61 100644 --- a/src/popsift/sift_pyramid.cu +++ b/src/popsift/sift_pyramid.cu @@ -26,7 +26,7 @@ #endif #if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX) -#include +#include #else #define nvtxRangePushA(a) #define nvtxRangePop() From 2175ee5c59c9b5806bb38b53ecd063bc92354ed1 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Tue, 30 Jul 2024 16:14:19 +0200 Subject: [PATCH 282/285] [ci] expand CI tests for appveyor and travis let BUILD_SHARED_LIBS determine if popsift is built as shared or static using shared cudart library is given by CMAKE_CUDA_RUNTIME_LIBRARY --- .github/workflows/continuous-integration.yml | 4 ---- appveyor.yml | 22 ++++++++++++++++++-- src/CMakeLists.txt | 8 ++----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 8b46ec1b..3f01875e 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -25,10 +25,6 @@ jobs: build_tpe: ["Release", "Debug"] exclude: # excludes debug on this one as it has a segmentation fault during the compilation (!) - - container: "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04" - build_tpe: "Debug" - - container: "alicevision/popsift-deps:cuda12.1.0-ubuntu22.04" - build_tpe: "Release" - container: "alicevision/popsift-deps:cuda11.8.0-ubuntu20.04" build_tpe: "Debug" diff --git a/appveyor.yml b/appveyor.yml index e7bac169..679ac5b5 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,3 +1,6 @@ +# +# Build system for the PopSift library, including its demo programs. +# version: '1.0.{build}' image: Visual Studio 2022 @@ -7,13 +10,16 @@ platform: configuration: - Release -# - Debug + - Debug #environment: # matrix: # - DBUILD_SHARED_LIBS: 0 # - DBUILD_SHARED_LIBS: 1 +# +# Check the separate file cudaInstallAppveyor for the installation of CUDA +# install: - cmd: >- call cudaInstallAppveyor.cmd @@ -23,10 +29,22 @@ install: --triplet %PLATFORM%-windows # devil +# +# When updating to a new version of visual studio, change the generation string after +# -G and find the suitable toolkit version that is listed after -T (v143 in this case). +# The CUDA Toolkit and the VS version must match. The matches are found in the CUDA +# documentation. +# The platform in this case is x64. Apparently, you need in after -T for VS and after -A +# for CUDA. +# You can only have one -T parameter, but you can separate several options with a comma. +# +# PopSift_USE_GRID_FILTER is off in this build because the installation of CUDA Thrust +# in cudaInstallAppveyor is not happening yet. +# before_build: - md build - cd build - - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS=%DBUILD_SHARED_LIBS% -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=%DBUILD_SHARED_LIBS% -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. + - cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake .. - ls -l build: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4764192c..ff3b3681 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,6 @@ +# Do not specify SHARED or STATIC in add_library. Let the variable BUILD_SHARED_LIBS determine this. -add_library(popsift SHARED +add_library(popsift popsift/popsift.cu popsift/popsift.h popsift/features.cu popsift/features.h popsift/sift_constants.cu popsift/sift_constants.h @@ -50,11 +51,6 @@ target_link_libraries(popsift CUDA::nvtx3) endif() -# It is necessary to choose between shared and static, otherwise the CUDA_RUNTIME_LIBRARY target property -# will not be set. -# According to https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_RUNTIME_LIBRARY.html -string(APPEND CMAKE_CUDA_FLAGS " --cudart shared") - set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION}) set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d") set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON) From a6f8113e4a39613fe30c710713e6bd7ea3000499 Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Fri, 9 Aug 2024 23:14:48 +0200 Subject: [PATCH 283/285] [doc] readme: update minimal boost version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 418d1278..738794c9 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ PopSift depends on: Optionally, for the provided applications: -* Boost >= 1.55 (required components {atomic, chrono, date-time, system, thread}-dev) +* Boost >= 1.71 (required components {atomic, chrono, date-time, system, thread}-dev) * DevIL (libdevil-dev) can be used to load a broader range of image formats, otherwise only pgm is supported. From 494408ead6af31d064e33c721ca5a1c867e60999 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Mon, 12 Aug 2024 11:20:21 +0200 Subject: [PATCH 284/285] explicit CC list for Jetson and Tegra platforms --- CMakeLists.txt | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a9c033cb..de88e943 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,27 @@ # First-order language CUDA requires at least CMake 3.18 cmake_minimum_required(VERSION 3.24) +# Default seletion of CUDA Compute Capabilities. +# This must be called before project() or cmake sets it to the oldest non-deprecated CC +# "all" and "all-major" work for Intel and perhaps for ARM with discrete GPUs, but not Tegra and Jetson. +if(EXISTS "/etc/nv_tegra_release") + # The CC list for Tegras and Jetson will require manual updates + set(CMAKE_CUDA_ARCHITECTURES "53;62;72;87" + CACHE + STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons") +else() + # The CC list for discrete GPUs will require CMake updates + set(CMAKE_CUDA_ARCHITECTURES "all-major" + CACHE + STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons") +endif() + project(PopSift VERSION 1.0.0 LANGUAGES CXX CUDA) # Policy to support CUDA as a first-order language for CMake. # Since CMake 3.18. See https://cmake.org/cmake/help/latest/policy/CMP0104.html cmake_policy(SET CMP0104 NEW) -set(CMAKE_CUDA_ARCHITECTURES "all-major" - CACHE - STRING "Which CUDA CCs to support: native, all, all-major or an explicit list delimited by semicolons" - FORCE) - # Set build path as a folder named as the platform (linux, windows, darwin...) plus the processor type set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") From 8623b697ac632f40d74f8773d646d6a804e89893 Mon Sep 17 00:00:00 2001 From: Carsten Griwodz Date: Wed, 14 Aug 2024 21:16:51 +0200 Subject: [PATCH 285/285] Remove broken and unused code path from L2 normalization (#166) undefined --- CMakeLists.txt | 7 ----- cmake/sift_config.h.in | 1 - src/popsift/s_desc_norm_l2.h | 58 +++++++++++------------------------- 3 files changed, 17 insertions(+), 49 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de88e943..3e9138a8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,6 @@ option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF) option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF) option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON) option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON) -option(PopSift_USE_NORMF "The __normf function computes Euclidean distance on large arrays. Fast but stability is uncertain." OFF) option(PopSift_NVCC_WARNINGS "Switch on several additional warning for CUDA nvcc" OFF) option(PopSift_USE_TEST_CMD "Add testing step for functional verification" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" ON) @@ -141,12 +140,6 @@ set(CMAKE_CUDA_STANDARD ${PopSift_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED ON) -if(PopSift_USE_NORMF AND CUDA_VERSION VERSION_GREATER_EQUAL "7.5") - set(PopSift_HAVE_NORMF 1) -else() - set(PopSift_HAVE_NORMF 0) -endif() - if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") set(PopSift_HAVE_SHFL_DOWN_SYNC 1) else() diff --git a/cmake/sift_config.h.in b/cmake/sift_config.h.in index 86095a55..b6807983 100644 --- a/cmake/sift_config.h.in +++ b/cmake/sift_config.h.in @@ -12,7 +12,6 @@ #define POPSIFT_IS_UNDEFINED(F) F() == 0 #define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@ -#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@ #define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@ #define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@ diff --git a/src/popsift/s_desc_norm_l2.h b/src/popsift/s_desc_norm_l2.h index 3a7ed858..b067d71f 100644 --- a/src/popsift/s_desc_norm_l2.h +++ b/src/popsift/s_desc_norm_l2.h @@ -50,24 +50,10 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool float4 descr; descr = ptr4[threadIdx.x]; -#if POPSIFT_IS_DEFINED(POPSIFT_HAVE_NORMF) - // normf() is an elegant function: sqrt(sum_0^127{v^2}) - // It exists from CUDA 7.5 but the trouble with CUB on the GTX 980 Ti forces - // us to with CUDA 7.0 right now - float norm; - if( threadIdx.x == 0 ) { - norm = normf( 128, src_desc ); - } - __syncthreads(); - norm = popsift::shuffle( norm, 0 ); - - descr.x = min( descr.x, 0.2f*norm ); - descr.y = min( descr.y, 0.2f*norm ); - descr.z = min( descr.z, 0.2f*norm ); - descr.w = min( descr.w, 0.2f*norm ); - + // 32 threads compute 4 squares each, then shuffle to performing a addition by + // reduction for the sum of 128 squares, result in thread 0 norm = descr.x * descr.x + descr.y * descr.y + descr.z * descr.z @@ -77,34 +63,25 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool norm += popsift::shuffle_down( norm, 4 ); norm += popsift::shuffle_down( norm, 2 ); norm += popsift::shuffle_down( norm, 1 ); - if( threadIdx.x == 0 ) { - // norm = __fsqrt_rn( norm ); - // norm = __fdividef( 512.0f, norm ); - norm = __frsqrt_rn( norm ); // inverse square root - norm = scalbnf( norm, d_consts.norm_multi ); - } -#else // not HAVE_NORMF - float norm; - norm = descr.x * descr.x - + descr.y * descr.y - + descr.z * descr.z - + descr.w * descr.w; - norm += popsift::shuffle_down( norm, 16 ); - norm += popsift::shuffle_down( norm, 8 ); - norm += popsift::shuffle_down( norm, 4 ); - norm += popsift::shuffle_down( norm, 2 ); - norm += popsift::shuffle_down( norm, 1 ); if( threadIdx.x == 0 ) { - norm = __fsqrt_rn( norm ); + // compute 1 / sqrt(sum) in round-to-nearest even mode in thread 0 + norm = __frsqrt_rn( norm ); } + + // spread the inverted norm from thread 0 to all threads in the warp norm = popsift::shuffle( norm, 0 ); - descr.x = min( descr.x, 0.2f*norm ); - descr.y = min( descr.y, 0.2f*norm ); - descr.z = min( descr.z, 0.2f*norm ); - descr.w = min( descr.w, 0.2f*norm ); + // quasi-normalize all 128 floats + descr.x = min( descr.x*norm, 0.2f ); + descr.y = min( descr.y*norm, 0.2f ); + descr.z = min( descr.z*norm, 0.2f ); + descr.w = min( descr.w*norm, 0.2f ); + // Repeat the procedure, but also add a multiplier. E.g., if the user wants to + // descriptors as bytes rather than floats, multiply by 256 - or even by 512 + // for better accuracy, which is OK because a point cannot be a keypoint if more + // than half of its gradient is in a single direction. norm = descr.x * descr.x + descr.y * descr.y + descr.z * descr.z @@ -114,13 +91,12 @@ void NormalizeL2::normalize( const float* src_desc, float* dst_desc, const bool norm += popsift::shuffle_down( norm, 4 ); norm += popsift::shuffle_down( norm, 2 ); norm += popsift::shuffle_down( norm, 1 ); + if( threadIdx.x == 0 ) { - // norm = __fsqrt_rn( norm ); - // norm = __fdividef( 512.0f, norm ); norm = __frsqrt_rn( norm ); // inverse square root norm = scalbnf( norm, d_consts.norm_multi ); } -#endif // HAVE_NORMF + norm = popsift::shuffle( norm, 0 ); descr.x = descr.x * norm;