Skip to content

Commit

Permalink
Added Hamming distance for MySQL
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Oct 6, 2024
1 parent aa469ad commit f6cb441
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 2 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,28 @@ Supported values are:

- `euclidean`
- `cosine`
- `hamming`

Note: The `DISTANCE()` function is [only available on HeatWave](https://dev.mysql.com/doc/refman/9.0/en/vector-functions.html)

### Binary Vectors

Use the `binary` type to store binary vectors

```ruby
class AddEmbeddingToItems < ActiveRecord::Migration[7.2]
def change
add_column :items, :embedding, :binary
end
end
```

Get the nearest neighbors by Hamming distance

```ruby
Item.nearest_neighbors(:embedding, "\x05", distance: "hamming").first(5)
```

## Examples

- [Embeddings](#openai-embeddings) with OpenAI
Expand Down
7 changes: 6 additions & 1 deletion lib/neighbor/model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def self.neighbor_attributes
dimensions = v[:dimensions]
dimensions ||= column_info&.limit unless column_info&.type == :binary
type = v[:type] || column_info&.type
type = :bit if type == :binary && adapter == :mysql

if !Neighbor::Utils.validate_dimensions(value, type, dimensions, adapter).nil?
errors.add(k, "must have #{dimensions} dimensions")
Expand Down Expand Up @@ -144,7 +145,11 @@ def self.neighbor_attributes
when :mariadb
"VEC_DISTANCE(#{quoted_attribute}, #{query})"
when :mysql
"DISTANCE(#{quoted_attribute}, #{query}, #{connection.quote(operator)})"
if operator == "BIT_COUNT"
"BIT_COUNT(#{quoted_attribute} ^ #{query})"
else
"DISTANCE(#{quoted_attribute}, #{query}, #{connection.quote(operator)})"
end
else
if operator == "#"
"bit_count(#{quoted_attribute} # #{query})"
Expand Down
9 changes: 8 additions & 1 deletion lib/neighbor/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module Neighbor
module Utils
def self.validate_dimensions(value, type, expected, adapter)
dimensions = type == :sparsevec ? value.dimensions : value.size
dimensions *= 8 if type == :bit && adapter == :sqlite
dimensions *= 8 if type == :bit && [:sqlite, :mysql].include?(adapter)
if expected && dimensions != expected
"Expected #{expected} dimensions, not #{dimensions}"
end
Expand All @@ -20,6 +20,8 @@ def self.validate_finite(value, type)
end

def self.validate(value, dimensions:, type:, adapter:)
type = :bit if type == :binary && adapter == :mysql

if (message = validate_dimensions(value, type, dimensions, adapter))
raise Error, message
end
Expand Down Expand Up @@ -87,6 +89,11 @@ def self.operator(adapter, column_type, distance)
when "euclidean"
"EUCLIDEAN"
end
when :binary
case distance
when "hamming"
"BIT_COUNT"
end
else
raise ArgumentError, "Unsupported type: #{column_type}"
end
Expand Down
35 changes: 35 additions & 0 deletions test/mysql_bit_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
require_relative "test_helper"
require_relative "support/mysql"

class MysqlBitTest < Minitest::Test
def setup
MysqlItem.delete_all
end

def test_hamming
create_bit_items
result = MysqlItem.find(1).nearest_neighbors(:binary_embedding, distance: "hamming").first(3)
assert_equal [2, 3], result.map(&:id)
assert_elements_in_delta [2, 3].map { |v| v * 1024 }, result.map(&:neighbor_distance)
end

def test_hamming_scope
create_bit_items
result = MysqlItem.nearest_neighbors(:binary_embedding, "\x05" * 1024, distance: "hamming").first(5)
assert_equal [2, 3, 1], result.map(&:id)
assert_elements_in_delta [0, 1, 2].map { |v| v * 1024 }, result.map(&:neighbor_distance)
end

def test_invalid_dimensions
error = assert_raises(ActiveRecord::RecordInvalid) do
MysqlItem.create!(binary_embedding: "\x00" * 1024 + "\x11")
end
assert_equal "Validation failed: Binary embedding must have 8192 dimensions", error.message
end

def create_bit_items
MysqlItem.create!(id: 1, binary_embedding: "\x00" * 1024)
MysqlItem.create!(id: 2, binary_embedding: "\x05" * 1024)
MysqlItem.create!(id: 3, binary_embedding: "\x07" * 1024)
end
end
2 changes: 2 additions & 0 deletions test/support/mysql.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ class MysqlRecord < ActiveRecord::Base
MysqlRecord.connection.instance_eval do
create_table :mysql_items, force: true do |t|
t.vector :embedding, limit: 3
t.binary :binary_embedding
end
end

class MysqlItem < MysqlRecord
has_neighbors :embedding
has_neighbors :binary_embedding, dimensions: 8192
end

# ensure has_neighbors does not cause model schema to load
Expand Down

0 comments on commit f6cb441

Please sign in to comment.