diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 000000000..e3279336f --- /dev/null +++ b/.bazelrc @@ -0,0 +1,3 @@ +build --client_env=CC=g++ +build --client_env=CXX=g++ +build --client_env=BAZEL_CXXOPTS=-std=c++17 diff --git a/.gitignore b/.gitignore index f80e5c682..75b6d6e40 100644 --- a/.gitignore +++ b/.gitignore @@ -378,3 +378,7 @@ gperftools # Rust rust/target + +# bazel +bazel-* +MODULE.bazel* diff --git a/BUILD b/BUILD new file mode 100644 index 000000000..1a1272cff --- /dev/null +++ b/BUILD @@ -0,0 +1,18 @@ +# BUILD + +cc_library( + name = "diskann_library", + srcs = glob(["src/*.cpp"]), + hdrs = glob(["include/*.h", "include/tsl/*.h"]), + includes = ["include", "include/tsl"], + copts = [ + "-ltcmalloc", "-Ofast", "-march=native", "-mtune=native", + "-I/usr/include/mkl", "-m64", "-Wl,--no-as-needed", + "-DMKL_ILP64", "-DNDEBUG", "-DUSE_AVX2", + "-mavx2", "-mfma", "-msse2", + "-ftree-vectorize", "-fno-builtin-malloc", "-fno-builtin-calloc", "-fno-builtin-realloc", + "-fno-builtin-free", "-fopenmp", "-fopenmp-simd", + "-funroll-loops", '-Wfatal-errors' + ], + visibility = ["//visibility:public"], +) diff --git a/README.md b/README.md index a20a1d671..a8da681cc 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ DiskANN is a suite of scalable, accurate and cost-effective approximate nearest neighbor search algorithms for large-scale vector search that support real-time changes and simple filters. -This code is based on ideas from the [DiskANN](https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf), [Fresh-DiskANN](https://arxiv.org/abs/2105.09613) and the [Filtered-DiskANN](https://harsha-simhadri.org/pubs/Filtered-DiskANN23.pdf) papers with further improvements. +This code is based on ideas from the [DiskANN](https://papers.nips.cc/paper/9527-rand-nsg-fast-accurate-billion-point-nearest-neighbor-search-on-a-single-node.pdf), [Fresh-DiskANN](https://arxiv.org/abs/2105.09613) and the [Filtered-DiskANN](https://harsha-simhadri.org/pubs/Filtered-DiskANN23.pdf) papers with further improvements. This code forked off from [code for NSG](https://github.com/ZJULearning/nsg) algorithm. This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). @@ -45,12 +45,12 @@ sudo sh l_BaseKit_p_2022.1.2.146.sh -a --components intel.oneapi.lin.mkl.devel - ### Build ```bash -mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j +mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j ``` ## Windows build: -The Windows version has been tested with Enterprise editions of Visual Studio 2022, 2019 and 2017. It should work with the Community and Professional editions as well without any changes. +The Windows version has been tested with Enterprise editions of Visual Studio 2022, 2019 and 2017. It should work with the Community and Professional editions as well without any changes. **Prerequisites:** @@ -63,7 +63,7 @@ git submodule init git submodule update ``` -* Environment variables: +* Environment variables: * [optional] If you would like to override the Boost library listed in windows/packages.config.in, set BOOST_ROOT to your Boost folder. **Build steps:** @@ -89,12 +89,16 @@ msbuild.exe diskann.sln /m /nologo /t:Build /p:Configuration="Release" /property * This will also build gperftools submodule for libtcmalloc_minimal dependency. * Generated binaries are stored in the x64/Release or x64/Debug directories. +## Bazel build: + +As you can see, there are some Bazel Build files here to support Bazel Build. For now only library itself can be built with Bazel, this is done to support DiskANN as a dependency in third party projects using Bazel Build. There is no support of building apps or to run it in Docker for now. + ## Usage: Please see the following pages on using the compiled code: -- [Commandline interface for building and search SSD based indices](workflows/SSD_index.md) -- [Commandline interface for building and search in memory indices](workflows/in_memory_index.md) +- [Commandline interface for building and search SSD based indices](workflows/SSD_index.md) +- [Commandline interface for building and search in memory indices](workflows/in_memory_index.md) - [Commandline examples for using in-memory streaming indices](workflows/dynamic_index.md) - [Commandline interface for building and search in memory indices with label data and filters](workflows/filtered_in_memory.md) - [Commandline interface for building and search SSD based indices with label data and filters](workflows/filtered_ssd_index.md) diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 000000000..187875429 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1 @@ +workspace(name = "microsoft_DiskANN")