You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am getting huge difference in ROC, when running on two different version on Faiss. In the work, I am extracting features from BERT and populating the index of FaissDB and finding the nearest neighbor
2025-02-27 16:39:38,081 - loader.py:66 - Successfully loaded faiss.
FAISS Version: 1.7.2
Numpy Version: 1.26.4
PyTorch Version: 2.3.0
GPU Enabled: False
GPU Architecture: NVIDIA A100-SXM4-40GB
extracting from layers [-1]
FAISS Version: 1.7.2
Numpy Version: 1.26.4
PyTorch Version: 2.3.0
GPU Enabled: True
GPU Architecture: NVIDIA A100-SXM4-40GB
Extraction embedding, computing support features: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 3367/3367 [00:41<00:00, 81.41it/s]
total_feat: (444444, 768)
self.percentage 0.1
Subsampling...: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 44444/44444 [00:46<00:00, 953.39it/s]
total_feat2: (44444, 768)
fitting the data
Data Type of Features: float32
create index dimension 768
class name: GpuIndexFlatL2
data fitted
Loading test data...
872 872 872
Infering.....: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 436/436 [00:05<00:00, 82.68it/s]
auroc: 0.9748070325900515
####################################
2025-02-27 16:42:26,698 - loader.py:56 - Successfully loaded faiss with AVX2 support.
FAISS Version: 1.7.3
Numpy Version: 1.24.3
PyTorch Version: 2.4.1+cu121
GPU Enabled: False
GPU Architecture: NVIDIA A100-SXM4-40GB
extracting from layers [-1]
FAISS Version: 1.7.3
Numpy Version: 1.24.3
PyTorch Version: 2.4.1+cu121
GPU Enabled: True
GPU Architecture: NVIDIA A100-SXM4-40GB
Extraction embedding, computing support features: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 3367/3367 [00:33<00:00, 100.00it/s]
total_feat: (444444, 768)
self.percentage 0.1
Subsampling...: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 44444/44444 [00:40<00:00, 1104.26it/s]
total_feat2: (44444, 768)
fitting the data
Data Type of Features: float32
create index dimension 768
class name: GpuIndexFlatL2
data fitted
Loading test data...
872 872 872
Infering.....: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 436/436 [00:04<00:00, 98.95it/s]
auroc: 0.20863418712472448
Args:
on_gpu: If set true, nearest neighbour searches are done on GPU.
num_workers: Number of workers to use with FAISS for similarity search.
"""
faiss.omp_set_num_threads(num_workers)
faiss.GpuIndexFlatConfig().useFloat16 = False
self.on_gpu = on_gpu
self.search_index = None
print("FAISS Version:", faiss.__version__)
print("Numpy Version:", np.__version__)
print("PyTorch Version:", torch.__version__)
print("GPU Enabled:", self.on_gpu)
print("GPU Architecture:", torch.cuda.get_device_name(0))
def _gpu_cloner_options(self):
return faiss.GpuClonerOptions()
def _index_to_gpu(self, index):
if self.on_gpu:
# For the non-gpu faiss python package, there is no GpuClonerOptions
# so we can not make a default in the function header.
return faiss.index_cpu_to_gpu(
faiss.StandardGpuResources(), 0, index, self._gpu_cloner_options()
)
return index
def _index_to_cpu(self, index):
if self.on_gpu:
return faiss.index_gpu_to_cpu(index)
return index
def _create_index(self, dimension):
print("create index", "dimension", dimension)
if self.on_gpu:
return faiss.GpuIndexFlatL2(
faiss.StandardGpuResources(), dimension, faiss.GpuIndexFlatConfig()
)
return faiss.IndexFlatL2(dimension)
def fit(self, features: np.ndarray) -> None:
"""
Adds features to the FAISS search index.
Args:
features: Array of size NxD.
"""
print("Data Type of Features:", features.dtype)
if self.search_index:
self.reset_index()
#print("features.shape:", features.shape) #features.shape: (44444, 768)
self.search_index = self._create_index(features.shape[-1]) #index dimension is token dimension
print("class name:", self.search_index.__class__.__name__)
self._train(self.search_index, features)
self.search_index.add(features)
def _train(self, _index, _features):
pass
def run(
self,
n_nearest_neighbours,
query_features: np.ndarray,
index_features: np.ndarray = None,
) -> Union[np.ndarray, np.ndarray, np.ndarray]:
"""
Returns distances and indices of nearest neighbour search.
Args:
query_features: Features to retrieve.
index_features: [optional] Index features to search in.
"""
#print("index_features:", index_features)
if index_features is None: # if index features during query is NONE, search created index features
return self.search_index.search(query_features, n_nearest_neighbours)
# Build a search index just for this search. # if index_features are not NULL
search_index = self._create_index(index_features.shape[-1])
print("Run:", search_index, index_features)
self._train(search_index, index_features)
search_index.add(index_features)
print("Index Description:", self.search_index)
return search_index.search(query_features, n_nearest_neighbours)
def save(self, filename: str) -> None:
faiss.write_index(self._index_to_cpu(self.search_index), filename)
def load(self, filename: str) -> None:
self.search_index = self._index_to_gpu(faiss.read_index(filename))
def reset_index(self):
if self.search_index:
self.search_index.reset()
self.search_index = None`
The text was updated successfully, but these errors were encountered:
I am getting huge difference in ROC, when running on two different version on Faiss. In the work, I am extracting features from BERT and populating the index of FaissDB and finding the nearest neighbor
2025-02-27 16:39:38,081 - loader.py:66 - Successfully loaded faiss.
FAISS Version: 1.7.2
Numpy Version: 1.26.4
PyTorch Version: 2.3.0
GPU Enabled: False
GPU Architecture: NVIDIA A100-SXM4-40GB
extracting from layers [-1]
FAISS Version: 1.7.2
Numpy Version: 1.26.4
PyTorch Version: 2.3.0
GPU Enabled: True
GPU Architecture: NVIDIA A100-SXM4-40GB
Extraction embedding, computing support features: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 3367/3367 [00:41<00:00, 81.41it/s]
total_feat: (444444, 768)
self.percentage 0.1
Subsampling...: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 44444/44444 [00:46<00:00, 953.39it/s]
total_feat2: (44444, 768)
fitting the data
Data Type of Features: float32
create index dimension 768
class name: GpuIndexFlatL2
data fitted
Loading test data...
872 872 872
Infering.....: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 436/436 [00:05<00:00, 82.68it/s]
auroc: 0.9748070325900515
####################################
2025-02-27 16:42:26,698 - loader.py:56 - Successfully loaded faiss with AVX2 support.
FAISS Version: 1.7.3
Numpy Version: 1.24.3
PyTorch Version: 2.4.1+cu121
GPU Enabled: False
GPU Architecture: NVIDIA A100-SXM4-40GB
extracting from layers [-1]
FAISS Version: 1.7.3
Numpy Version: 1.24.3
PyTorch Version: 2.4.1+cu121
GPU Enabled: True
GPU Architecture: NVIDIA A100-SXM4-40GB
Extraction embedding, computing support features: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 3367/3367 [00:33<00:00, 100.00it/s]
total_feat: (444444, 768)
self.percentage 0.1
Subsampling...: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 44444/44444 [00:40<00:00, 1104.26it/s]
total_feat2: (44444, 768)
fitting the data
Data Type of Features: float32
create index dimension 768
class name: GpuIndexFlatL2
data fitted
Loading test data...
872 872 872
Infering.....: 100%|¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦¦| 436/436 [00:04<00:00, 98.95it/s]
auroc: 0.20863418712472448
`class FaissNN(object):
def init(self, on_gpu: bool = False, num_workers: int = 4) -> None:
"""FAISS Nearest neighbourhood search.
The text was updated successfully, but these errors were encountered: