Skip to content

Commit

Permalink
Merge pull request #4103 from nulib/adjust-embedding-text
Browse files Browse the repository at this point in the history
Make embedding keys configurable
  • Loading branch information
kdid authored Aug 15, 2024
2 parents ab1cdaf + df226dd commit 692f006
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 25 deletions.
24 changes: 23 additions & 1 deletion app/config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,29 @@ config :meadow, Meadow.Search.Cluster,
default: nil
),
embedding_dimensions:
aws_secret("meadow", dig: ["search", "embedding_dimensions"], default: nil)
aws_secret("meadow", dig: ["search", "embedding_dimensions"], default: nil),
embedding_text_fields: [
:title,
:description,
:collection,
# :alternate_title,
# :caption,
# :table_of_contents,
# :abstract,
# :contributor,
# :creator,
# :date_created,
# :genre,
# :subject,
# :style_period,
# :language,
# :location,
# :publisher,
# :scope_and_contents,
# :technique,
# :physical_description_material,
# :physical_description_size,
]

config :meadow,
ark: %{
Expand Down
26 changes: 2 additions & 24 deletions app/lib/meadow/indexing/v2/work.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ defmodule Meadow.Indexing.V2.Work do

alias Meadow.Data.FileSets
alias Meadow.Data.Schemas.{ControlledMetadataEntry, NoteEntry, RelatedURLEntry}
alias Meadow.Search.Config

def encode(work) do
%{
Expand Down Expand Up @@ -77,32 +78,9 @@ defmodule Meadow.Indexing.V2.Work do
|> prepare_embedding_field()
end

@embedding_keys [
:title,
:description,
:collection,
:alternate_title,
:caption,
:table_of_contents,
:abstract,
:contributor,
:creator,
:date_created,
:genre,
:subject,
:style_period,
:language,
:location,
:publisher,
:scope_and_contents,
:technique,
:physical_description_material,
:physical_description_size,
]

defp prepare_embedding_field(map) do
value =
@embedding_keys
Config.embedding_text_fields()
|> Enum.reduce([], fn field_name, acc ->
v = prepare_embedding_value(Map.get(map, field_name))
[v | acc]
Expand Down
5 changes: 5 additions & 0 deletions app/lib/meadow/search/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ defmodule Meadow.Search.Config do
|> Keyword.get(:embedding_dimensions)
end

def embedding_text_fields do
Application.get_env(:meadow, Meadow.Search.Cluster)
|> Keyword.get(:embedding_text_fields)
end

def index_versions do
index_configs()
|> Enum.map(& &1.version)
Expand Down

0 comments on commit 692f006

Please sign in to comment.