diff --git a/Duckdb.html b/Duckdb.html new file mode 100644 index 0000000..35ed8e9 --- /dev/null +++ b/Duckdb.html @@ -0,0 +1,64 @@ + + +
+
+ import numpy as np
+
+ def dot_product(vector1, vector2):
+ return np.dot(np.array(vector1), np.array(vector2))
+
+ import numpy as np
+
+ def dot_product2(vector1, vector2):
+ d_vector = [float(x) for x in vector2.split(',')]
+ return np.dot(np.array(vector1), np.array(d_vector))
+
+ import duckdb
+ from duckdb.typing import VARCHAR, DOUBLE
+
+ duckdb.con.create_function("dot_product", dot_product,
+ [duckdb.array_type(float),
+ duckdb.array_type(float)],
+ DOUBLE, side_effects=True)
+ duckdb.con.create_function("dot_product2", dot_product2,
+ [duckdb.array_type(float),
+ VARCHAR],
+ DOUBLE, side_effects=True)
+
+ import duckdb
+ from duckdb.typing import VARCHAR, DOUBLE
+
+ vectors1 = ', '.join(str(value) for value in vectors)
+ vector_columns = ', '.join([str(f"\"{i}\"") for i in range(0, 384)])
+
+ start_time = time.time()
+ duckdb.con.execute(f"""SELECT {col1, col2},
+ dot_product( [{vector1}], [{vector_columns}]) AS similarity
+ FROM {tablename} ORDER BY similarity DESC""").fetchdf()
+ end_time = time.time()
+ print(f"Time {end_time - start_time}")
+
+ start_time = time.time()
+ duckdb.con.execute(f"""SELECT {select},
+ dot_product2( [{vector1}], vectors_col_name ) AS similarity
+ FROM {tablename} ORDER BY similarity DESC""").fetchdf()
+ end_time = time.time()
+ print(f"Time {end_time - start_time}")
+