From 54d79f4163c8956878a02d6ceeb5c8b4c752737f Mon Sep 17 00:00:00 2001 From: Praneeth Bedapudi Date: Fri, 1 Mar 2024 22:08:54 +0530 Subject: [PATCH] minor changes Signed-off-by: Praneeth Bedapudi --- liteindex/defined_index.py | 52 ++++++++++++++----------------------- setup.py | 2 +- tests/test_vector_search.py | 8 +++--- 3 files changed, 24 insertions(+), 38 deletions(-) diff --git a/liteindex/defined_index.py b/liteindex/defined_index.py index 61a9214..821cf97 100644 --- a/liteindex/defined_index.py +++ b/liteindex/defined_index.py @@ -132,7 +132,7 @@ def __update_vector_search_index(self, for_key, dim=None): self.__vector_indexes_last_updated_at[for_key] = 0 newest_updated_at_time = self.__connection.execute( - f"SELECT MAX(updated_at) FROM {self.name}" + f'''SELECT MAX(updated_at) FROM "{self.name}"''' ).fetchone()[0] if newest_updated_at_time is None: @@ -145,7 +145,7 @@ def __update_vector_search_index(self, for_key, dim=None): integer_id_batch = [] for __row in self.__connection.execute( - f"SELECT integer_id, {for_key} FROM {self.name} WHERE updated_at > {self.__vector_indexes_last_updated_at[for_key]} AND updated_at <= {newest_updated_at_time} AND {for_key} IS NOT NULL" + f"""SELECT integer_id, "{for_key}" FROM "{self.name}" WHERE updated_at > {self.__vector_indexes_last_updated_at[for_key]} AND updated_at <= {newest_updated_at_time} AND "{for_key}" IS NOT NULL""" ): embeddings_batch.append(__row[1]) integer_id_batch.append(__row[0]) @@ -219,12 +219,12 @@ def __get_scores_and_integer_ids_table_name( _temp_name = f"temp_embeds_{uuid.uuid4().hex}" conn.execute( - f"CREATE TEMP TABLE {_temp_name} (_integer_id INTEGER PRIMARY KEY, score NUMBER)" + f"""CREATE TEMP TABLE {_temp_name} (_integer_id INTEGER PRIMARY KEY, score NUMBER)""" ) _temp_name = f"temp.{_temp_name}" conn.executemany( - f"INSERT INTO {_temp_name} (_integer_id, score) VALUES (?, ?)", + f"""INSERT INTO {_temp_name} (_integer_id, score) VALUES (?, ?)""", zip(integer_ids, scores), ) @@ -288,7 +288,7 @@ def __decompressor(self): def __validate_set_schema_if_exists(self): try: rows = self.__connection.execute( - f"SELECT * FROM {self.__meta_table_name}" + f'''SELECT * FROM "{self.__meta_table_name}"''' ).fetchall() except: return @@ -339,21 +339,19 @@ def __create_table_and_meta_table(self): with self.__connection: self.__connection.execute( - f"CREATE TABLE IF NOT EXISTS {self.name} (integer_id INTEGER PRIMARY KEY AUTOINCREMENT, id TEXT UNIQUE, updated_at NUMBER, {columns_str})" + f"""CREATE TABLE IF NOT EXISTS "{self.name}" (integer_id INTEGER PRIMARY KEY AUTOINCREMENT, id TEXT UNIQUE, updated_at NUMBER, {columns_str})""" ) self.__connection.execute( - f"CREATE INDEX IF NOT EXISTS idx_{self.name}_updated_at ON {self.name} (updated_at)" + f"""CREATE INDEX IF NOT EXISTS "idx_{self.name}_updated_at" ON "{self.name}" (updated_at)""" ) self.__connection.execute( - f"CREATE TABLE IF NOT EXISTS {self.__meta_table_name} " - "(key TEXT PRIMARY KEY, value_type TEXT)" + f"""CREATE TABLE IF NOT EXISTS "{self.__meta_table_name}" (key TEXT PRIMARY KEY, value_type TEXT)""" ) self.__connection.executemany( - f"INSERT OR IGNORE INTO {self.__meta_table_name} (key, value_type) " - f"VALUES (?, ?)", + f"""INSERT OR IGNORE INTO "{self.__meta_table_name}" (key, value_type) VALUES (?, ?)""", [(key, value_type) for key, value_type in meta_columns], ) @@ -470,14 +468,16 @@ def get( def clear(self): # CLEAR function: deletes the content of the table but keeps the table itself and the metadata table with self.__connection: - self.__connection.execute(f"DROP TABLE IF EXISTS {self.name}") + self.__connection.execute(f'''DROP TABLE IF EXISTS "{self.name}"''') self.__create_table_and_meta_table() def drop(self): # DROP function: deletes both the table itself and the metadata table with self.__connection: - self.__connection.execute(f"DROP TABLE IF EXISTS {self.name}") - self.__connection.execute(f"DROP TABLE IF EXISTS {self.__meta_table_name}") + self.__connection.execute(f'''DROP TABLE IF EXISTS "{self.name}"''') + self.__connection.execute( + f'''DROP TABLE IF EXISTS "{self.__meta_table_name}"''' + ) def search( self, @@ -580,7 +580,7 @@ def search( if sorting_by_vector: self.__connection.execute( - f"DROP TABLE IF EXISTS {integer_ids_to_scores_table_name}" + f'''DROP TABLE IF EXISTS "{integer_ids_to_scores_table_name}"''' ) results = {} @@ -670,7 +670,7 @@ def pop(self, ids=None, query={}, n=1, sort_by=None, reversed_sort=False): self.__decompressor, ) for row in self.__connection.execute( - f"DELETE FROM {self.name} WHERE id IN ({', '.join(['?' for _ in ids])}) RETURNING *", + f"""DELETE FROM "{self.name}" WHERE id IN ({', '.join(['?' for _ in ids])}) RETURNING *""", ids, ).fetchall() } @@ -719,7 +719,7 @@ def delete(self, ids=None, query=None): ids = [ids] placeholders = ", ".join(["?" for _ in ids]) - sql_query = f"DELETE FROM {self.name} WHERE id IN ({placeholders})" + sql_query = f"""DELETE FROM "{self.name}" WHERE id IN ({placeholders})""" self.__connection.execute(sql_query, ids) self.__connection.commit() else: @@ -757,7 +757,7 @@ def optimize_for_query(self, keys, is_unique=False): for size_hash in size_hashes: self.__connection.execute( - f"""CREATE INDEX IF NOT EXISTS "idx_{self.name}_{size_hash}" ON {self.name} ({size_hash})""" + f"""CREATE INDEX IF NOT EXISTS "idx_{self.name}_{size_hash}" ON "{self.name}" ({size_hash})""" ) self.__connection.commit() @@ -768,7 +768,7 @@ def list_optimized_keys(self): for k, v in { _[1].replace(f"idx_{self.name}_", ""): {"is_unique": bool(_[2])} for _ in self.__connection.execute( - f"PRAGMA index_list({self.name})" + f"""PRAGMA index_list("{self.name}")""" ).fetchall() if _[1].startswith(f"idx_{self.name}_") }.items() @@ -844,20 +844,6 @@ def create_trigger( END; """ - def list_triggers(self, table_name=None): - if table_name: - result = self.__connection.execute( - f"SELECT name FROM sqlite_master WHERE type = 'trigger' AND tbl_name = '{table_name}';" - ) - else: - result = self.__connection.execute( - f"SELECT name FROM sqlite_master WHERE type = 'trigger';" - ) - return result.fetchall() - - def delete_trigger(self, trigger_name): - self.__connection.execute(f"DROP TRIGGER {trigger_name};") - def vaccum(self): self.__connection.execute("VACUUM") self.__connection.commit() diff --git a/setup.py b/setup.py index dc34f9a..7443ddb 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ EMAIL = "praneeth@bpraneeth.com" AUTHOR = "BEDAPUDI PRANEETH" REQUIRES_PYTHON = ">=3.6.0" -VERSION = "0.0.2.dev59" +VERSION = "0.0.2.dev60" # What packages are required for this module to be executed? REQUIRED = [] diff --git a/tests/test_vector_search.py b/tests/test_vector_search.py index 450ec36..63a6250 100644 --- a/tests/test_vector_search.py +++ b/tests/test_vector_search.py @@ -11,7 +11,7 @@ index = DefinedIndex( "test_vetors", schema = { - "embedding": DefinedTypes.normalized_embedding + "emb edding": DefinedTypes.normalized_embedding } ) @@ -21,7 +21,7 @@ query_vectors = [] -for f in tqdm(glob(f"{sys.argv[1]}/*")): +for f in tqdm(glob(f"{sys.argv[1]}/*")[:5]): if os.path.splitext(f)[-1].lower() in {".png", ".jpg", ".jpeg"}: embedding = onnx_model.get_image_embeddings([Image.open(f).convert("RGB")])[0] embedding = embedding / np.linalg.norm(embedding) @@ -30,7 +30,7 @@ index.update( { f: { - "embedding": embedding, + "emb edding": embedding, } } ) @@ -38,7 +38,7 @@ for query_vector in query_vectors: results = index.search( {}, - sort_by="embedding", + sort_by="emb edding", reversed_sort=True, sort_by_embedding=query_vector, select_keys=[],