diff --git a/Duckdb.html b/Duckdb.html new file mode 100644 index 0000000..35ed8e9 --- /dev/null +++ b/Duckdb.html @@ -0,0 +1,64 @@ + + + + DuckDB UDF Function Test + + +

Function Examples

+ +

Function 1: dot product of two vectors double[] and double[]

+

+ import numpy as np + + def dot_product(vector1, vector2): + return np.dot(np.array(vector1), np.array(vector2)) +

+ +

Function 2: dot product of one vectors double[] and one vectors varchar

+

+ import numpy as np + + def dot_product2(vector1, vector2): + d_vector = [float(x) for x in vector2.split(',')] + return np.dot(np.array(vector1), np.array(d_vector)) +

+ +

Register UDF: register both dot product function

+

+ import duckdb + from duckdb.typing import VARCHAR, DOUBLE + + duckdb.con.create_function("dot_product", dot_product, + [duckdb.array_type(float), + duckdb.array_type(float)], + DOUBLE, side_effects=True) + duckdb.con.create_function("dot_product2", dot_product2, + [duckdb.array_type(float), + VARCHAR], + DOUBLE, side_effects=True) +

+ +

Run UDF: execute the query with both dot product function

+

+ import duckdb + from duckdb.typing import VARCHAR, DOUBLE + + vectors1 = ', '.join(str(value) for value in vectors) + vector_columns = ', '.join([str(f"\"{i}\"") for i in range(0, 384)]) + + start_time = time.time() + duckdb.con.execute(f"""SELECT {col1, col2}, + dot_product( [{vector1}], [{vector_columns}]) AS similarity + FROM {tablename} ORDER BY similarity DESC""").fetchdf() + end_time = time.time() + print(f"Time {end_time - start_time}") + + start_time = time.time() + duckdb.con.execute(f"""SELECT {select}, + dot_product2( [{vector1}], vectors_col_name ) AS similarity + FROM {tablename} ORDER BY similarity DESC""").fetchdf() + end_time = time.time() + print(f"Time {end_time - start_time}") +

+ + diff --git a/index.html b/index.html index 25c3dd7..ca65094 100644 --- a/index.html +++ b/index.html @@ -21,6 +21,9 @@

Project Link

StorageLayoutMapper + +

Random Links

+ Duckdb UDF test