Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exp: print distances of out neighbours from query and groundtruthvalues #587

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ bld/

# Visual Studio 2015/2017 cache/options directory
.vs/
.vscode
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/

Expand Down
6 changes: 4 additions & 2 deletions apps/search_memory_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path,
double best_recall = 0.0;

for (uint32_t test_id = 0; test_id < Lvec.size(); test_id++)
{
{
std::cout<<"L: "<<test_id<<std::endl;
uint32_t L = Lvec[test_id];
if (L < recall_at)
{
Expand Down Expand Up @@ -198,7 +199,8 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path,
}
}
else
{
{
std::cout<<"Query: "<<i+1<<std::endl;
cmp_stats[i] = index
->search(query + i * query_aligned_dim, recall_at, L,
query_result_ids[test_id].data() + i * recall_at)
Expand Down
77 changes: 77 additions & 0 deletions run_from_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import subprocess
import os

# Path to the executable and its arguments
exe_path = r"C:\Users\ameyv\MSR\DiskANN\x64\Debug\build_memory_index.exe"
#exe_path = r"C:\Users\ameyv\MSR\DiskANN\x64\Release\build_disk_index.exe"
#exe_path = r"C:\Users\ameyv\MSR\DiskANN\x64\Release\gen_random_slice.exe"

command_type = 1

if command_type == 1:
data_path = r"C:\Users\ameyv\MSR\sift_base.bin"
index_path_prefix = r"C:\Users\ameyv\MSR\sift_memory_index_R64_L100"
arguments = [
"--data_type", "float",
"--dist_fn", "l2",
"--data_path", data_path,
"--index_path_prefix", index_path_prefix,
"-R", "64",
"-L", "100"
]
elif command_type == 2:
memory = 1
data_path = r"C:\Users\ameyv\MSR\sift_base.bin"
index_path_prefix = r"C:\Users\ameyv\MSR\sift_disk_index"
arguments = [
"--data_type", "float",
"--dist_fn", "l2",
"--data_path", data_path,
"--index_path_prefix", index_path_prefix,
"-R", "16",
"-L", "25",
"-B", "0.003",
"-T","1",
"-M", str(memory)
]
elif command_type == 3:
base_file_path = r"C:\Users\ameyv\MSR\sift_base.bin"
arguments = ["float",
base_file_path,
"sift_50k",
"0.05"
]

# Constructing the complete command
command = [exe_path] + arguments

# Print the command to debug
print("Executing command:", command)

# Check if the executable exists and is a file
if not os.path.isfile(exe_path):
print(f"Error: The executable {exe_path} does not exist or is not a file.")
else:
# Execute the executable with arguments and capture the output
try:
# Run the executable with arguments and capture the output and errors
directory = r"C:\Users\ameyv\MSR\DiskANN\results" + "\\"
file_base = r"\log.txt"
file_path = directory + file_base
if os.path.exists(file_path):
# Delete the file
os.remove(file_path)
with open(directory + file_base, 'a') as log_file: # Open the file in append mode
result = subprocess.run(command, stdout=log_file)

except subprocess.CalledProcessError as e:
print(f"Program failed with return code {e.returncode}")
print(f"Output: {e.output}")
print(f"Errors: {e.stderr}")

except FileNotFoundError:
print(f"Executable not found: {exe_path}")

except Exception as e:
print(f"An error occurred: {str(e)}")

53 changes: 50 additions & 3 deletions src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,15 @@ template <typename T, typename TagT, typename LabelT> std::vector<uint32_t> Inde
}
}

// if(_nd > 50000){
// std::random_device rd;
// std::mt19937 gen(rd());
// std::uniform_int_distribution<uint32_t> dis(0, (uint32_t)_nd-1);
// for(uint32_t i=0;i<1000;i++){
// init_ids.emplace_back((uint32_t)dis(gen));
// }
// }

return init_ids;
}

Expand Down Expand Up @@ -879,11 +888,20 @@ std::pair<uint32_t, uint32_t> Index<T, TagT, LabelT>::iterate_to_fixed_point(

uint32_t hops = 0;
uint32_t cmps = 0;

while (best_L_nodes.has_unexpanded_node())
{
auto nbr = best_L_nodes.closest_unexpanded();
auto n = nbr.id;

// if(search_invocation){
// std::vector<uint32_t> id_scratch_temp = {n};
// std::vector<float> dist_scratch_temp = {0.0};
// compute_dists(id_scratch_temp, dist_scratch_temp);
// diskann::cout<<"Iteration/Hop: #"<<hops+1<<std::endl;
// diskann::cout<<"Current L size: "<<best_L_nodes.size()<<std::endl;
// diskann::cout<<"Node expanded(ID) : "<<nbr.id<<" Distance(ID,Query): "<<dist_scratch_temp[0]<<std::endl;
// }
hops++;

// Add node to expanded nodes to create pool for prune later
if (!search_invocation)
Expand Down Expand Up @@ -966,12 +984,29 @@ std::pair<uint32_t, uint32_t> Index<T, TagT, LabelT>::iterate_to_fixed_point(
compute_dists(id_scratch, dist_scratch);
cmps += (uint32_t)id_scratch.size();

// Insert <id, dist> pairs into the pool of candidates
// // Insert <id, dist> pairs into the pool of candidates
// if (search_invocation){
// diskann::cout<<"Comparisons(Neighbors of ID) "<<(uint32_t)id_scratch.size()<<std::endl;
// diskann::cout<<"Distances of Neighbors: ID, Query"<<std::endl;
// }
for (size_t m = 0; m < id_scratch.size(); ++m)
{
// if(search_invocation){
// float cur_dist = _pq_data_store->get_distance(id_scratch[m], n);
// diskann::cout<<"pt #"<<m+1<<": "<<cur_dist<<std::setw(10)<<dist_scratch[m]<<std::endl;
// }
best_L_nodes.insert(Neighbor(id_scratch[m], dist_scratch[m]));
}
}
if (search_invocation){
diskann::cout<<"Total Iterations/Hops: "<<hops<<std::endl;
diskann::cout<<"Total comparisons: "<<cmps<<std::endl;
// diskann::cout<<"L size: "<<best_L_nodes.size()<<std::endl;
// diskann::cout<<"Expanded Nodes List"<<scratch->expanded_nodes_vec().size()<<std::endl;
// for(auto neighbor : scratch->expanded_nodes_vec()){
// diskann::cout<<neighbor.id<<std::endl;
// }
}
return std::make_pair(hops, cmps);
}

Expand Down Expand Up @@ -1977,12 +2012,24 @@ std::pair<uint32_t, uint32_t> Index<T, TagT, LabelT>::search(const T *query, con
}

const std::vector<LabelT> unused_filter_label;
const std::vector<uint32_t> init_ids = get_init_ids();
const std::vector<uint32_t> init_ids_temp = get_init_ids();

std::shared_lock<std::shared_timed_mutex> lock(_update_lock);

_data_store->preprocess_query(query, scratch);

std::vector<uint32_t> init_ids;
float min_dist = std::numeric_limits<float>::max();
for(auto id:init_ids_temp){

float dist = _data_store->get_distance(query,id);
if(dist < min_dist){
min_dist = dist;
init_ids.clear();
init_ids.push_back(id);
}
}

auto retval = iterate_to_fixed_point(scratch, L, init_ids, false, unused_filter_label, true);

NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes();
Expand Down
Loading