From df226dd863861705835ea1a71bb05a86ebc2aaea Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Thu, 15 Aug 2024 15:51:14 +0000 Subject: [PATCH] Make embedding keys configurable --- app/config/config.exs | 24 +++++++++++++++++++++++- app/lib/meadow/indexing/v2/work.ex | 26 ++------------------------ app/lib/meadow/search/config.ex | 5 +++++ 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/app/config/config.exs b/app/config/config.exs index 55aea6a1a..3392d04e5 100644 --- a/app/config/config.exs +++ b/app/config/config.exs @@ -81,7 +81,29 @@ config :meadow, Meadow.Search.Cluster, default: nil ), embedding_dimensions: - aws_secret("meadow", dig: ["search", "embedding_dimensions"], default: nil) + aws_secret("meadow", dig: ["search", "embedding_dimensions"], default: nil), + embedding_text_fields: [ + :title, + :description, + :collection, + # :alternate_title, + # :caption, + # :table_of_contents, + # :abstract, + # :contributor, + # :creator, + # :date_created, + # :genre, + # :subject, + # :style_period, + # :language, + # :location, + # :publisher, + # :scope_and_contents, + # :technique, + # :physical_description_material, + # :physical_description_size, + ] config :meadow, ark: %{ diff --git a/app/lib/meadow/indexing/v2/work.ex b/app/lib/meadow/indexing/v2/work.ex index eacfc78c2..c231ec15c 100644 --- a/app/lib/meadow/indexing/v2/work.ex +++ b/app/lib/meadow/indexing/v2/work.ex @@ -5,6 +5,7 @@ defmodule Meadow.Indexing.V2.Work do alias Meadow.Data.FileSets alias Meadow.Data.Schemas.{ControlledMetadataEntry, NoteEntry, RelatedURLEntry} + alias Meadow.Search.Config def encode(work) do %{ @@ -77,32 +78,9 @@ defmodule Meadow.Indexing.V2.Work do |> prepare_embedding_field() end - @embedding_keys [ - :title, - :description, - :collection, - :alternate_title, - :caption, - :table_of_contents, - :abstract, - :contributor, - :creator, - :date_created, - :genre, - :subject, - :style_period, - :language, - :location, - :publisher, - :scope_and_contents, - :technique, - :physical_description_material, - :physical_description_size, - ] - defp prepare_embedding_field(map) do value = - @embedding_keys + Config.embedding_text_fields() |> Enum.reduce([], fn field_name, acc -> v = prepare_embedding_value(Map.get(map, field_name)) [v | acc] diff --git a/app/lib/meadow/search/config.ex b/app/lib/meadow/search/config.ex index 24315837b..13c3ee1bf 100644 --- a/app/lib/meadow/search/config.ex +++ b/app/lib/meadow/search/config.ex @@ -76,6 +76,11 @@ defmodule Meadow.Search.Config do |> Keyword.get(:embedding_dimensions) end + def embedding_text_fields do + Application.get_env(:meadow, Meadow.Search.Cluster) + |> Keyword.get(:embedding_text_fields) + end + def index_versions do index_configs() |> Enum.map(& &1.version)