Skip to content

Commit

Permalink
Change: #532 ElasticSearch設定の外出し (#650)
Browse files Browse the repository at this point in the history
* Change: #532 ElasticSearch設定の外出し

* バージョンチェック

* 起動時にエラー
  • Loading branch information
kmycode authored Mar 12, 2024
1 parent 8e7c665 commit a8fbcb3
Show file tree
Hide file tree
Showing 9 changed files with 508 additions and 269 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
/node_modules/
/build/

# Ignore elasticsearch config
/.elasticsearch.yml

# Ignore Vagrant files
.vagrant/

Expand Down
93 changes: 13 additions & 80 deletions app/chewy/accounts_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,83 +3,9 @@
class AccountsIndex < Chewy::Index
include DatetimeClampingConcern

settings index: index_preset(refresh_interval: '30s'), analysis: {
filter: {
english_stop: {
type: 'stop',
stopwords: '_english_',
},

english_stemmer: {
type: 'stemmer',
language: 'english',
},

english_possessive_stemmer: {
type: 'stemmer',
language: 'possessive_english',
},

my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
},

analyzer: {
natural: {
tokenizer: 'standard',
filter: %w(
lowercase
asciifolding
cjk_width
elision
english_possessive_stemmer
english_stop
english_stemmer
),
},

sudachi_analyzer: {
filter: %w(
my_posfilter
sudachi_normalizedform
),
type: 'custom',
tokenizer: 'sudachi_tokenizer',
},

verbatim: {
tokenizer: 'standard',
filter: %w(lowercase asciifolding cjk_width),
},

edge_ngram: {
tokenizer: 'edge_ngram',
filter: %w(lowercase asciifolding cjk_width),
},
},

tokenizer: {
edge_ngram: {
type: 'edge_ngram',
min_gram: 1,
max_gram: 15,
},

sudachi_tokenizer: {
resources_path: '/etc/elasticsearch/sudachi',
split_mode: 'A',
type: 'sudachi_tokenizer',
discard_punctuation: 'true',
},
},
}
# ElasticSearch config is moved to "/config/elasticsearch.default.yml".
# Edit it when original Mastodon changed ElasticSearch config.
settings index: index_preset(refresh_interval: '30s'), analysis: ChewyConfig.instance.accounts

index_scope ::Account.searchable.includes(:account_stat)

Expand All @@ -90,8 +16,15 @@ class AccountsIndex < Chewy::Index
field(:properties, type: 'keyword', value: ->(account) { account.searchable_properties })
field(:last_status_at, type: 'date', value: ->(account) { clamp_date(account.last_status_at || account.created_at) })
field(:domain, type: 'keyword', value: ->(account) { account.domain || '' })
field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(account) { account.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'natural') }
field(:display_name, type: 'text', analyzer: ChewyConfig.instance.accounts_analyzers.dig('display_name', 'analyzer')) do
field :edge_ngram, type: 'text', analyzer: ChewyConfig.instance.accounts_analyzers.dig('display_name', 'edge_ngram', 'analyzer'), search_analyzer: ChewyConfig.instance.accounts_analyzers.dig('display_name', 'edge_ngram', 'search_analyzer')
end
field(:username, type: 'text', analyzer: ChewyConfig.instance.accounts_analyzers.dig('username', 'analyzer'), value: lambda { |account|
[account.username, account.domain].compact.join('@')
}) do
field :edge_ngram, type: 'text', analyzer: ChewyConfig.instance.accounts_analyzers.dig('username', 'edge_ngram', 'analyzer'),
search_analyzer: ChewyConfig.instance.accounts_analyzers.dig('username', 'edge_ngram', 'search_analyzer')
end
field(:text, type: 'text', analyzer: ChewyConfig.instance.accounts_analyzers.dig('text', 'analyzer'), value: ->(account) { account.searchable_text }) { field(:stemmed, type: 'text', analyzer: ChewyConfig.instance.accounts_analyzers.dig('text', 'stemmed', 'analyzer')) }
end
end
82 changes: 5 additions & 77 deletions app/chewy/public_statuses_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,81 +3,9 @@
class PublicStatusesIndex < Chewy::Index
include DatetimeClampingConcern

settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: {
filter: {
english_stop: {
type: 'stop',
stopwords: '_english_',
},

english_stemmer: {
type: 'stemmer',
language: 'english',
},

english_possessive_stemmer: {
type: 'stemmer',
language: 'possessive_english',
},

my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
},

analyzer: {
content: {
tokenizer: 'uax_url_email',
filter: %w(
english_possessive_stemmer
lowercase
asciifolding
cjk_width
english_stop
english_stemmer
),
},

hashtag: {
tokenizer: 'keyword',
filter: %w(
word_delimiter_graph
lowercase
asciifolding
cjk_width
),
},

sudachi_analyzer: {
tokenizer: 'sudachi_tokenizer',
type: 'custom',
filter: %w(
english_possessive_stemmer
lowercase
asciifolding
cjk_width
english_stop
english_stemmer
my_posfilter
sudachi_normalizedform
),
},
},
tokenizer: {
sudachi_tokenizer: {
resources_path: '/etc/elasticsearch/sudachi',
split_mode: 'A',
type: 'sudachi_tokenizer',
discard_punctuation: 'true',
},
},
}
# ElasticSearch config is moved to "/config/elasticsearch.default.yml".
# Edit it when original Mastodon changed ElasticSearch config.
settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: ChewyConfig.instance.public_statuses

index_scope ::Status.unscoped
.kept
Expand All @@ -87,8 +15,8 @@ class PublicStatusesIndex < Chewy::Index
root date_detection: false do
field(:id, type: 'long')
field(:account_id, type: 'long')
field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') }
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:text, type: 'text', analyzer: ChewyConfig.instance.public_statuses_analyzers.dig('text', 'analyzer'), value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: ChewyConfig.instance.public_statuses_analyzers.dig('text', 'stemmed', 'analyzer')) }
field(:tags, type: 'text', analyzer: ChewyConfig.instance.public_statuses_analyzers.dig('tags', 'analyzer'), value: ->(status) { status.tags.map(&:display_name) })
field(:language, type: 'keyword')
field(:domain, type: 'keyword', value: ->(status) { status.account.domain || '' })
field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })
Expand Down
86 changes: 5 additions & 81 deletions app/chewy/statuses_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,85 +3,9 @@
class StatusesIndex < Chewy::Index
include DatetimeClampingConcern

settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: {
filter: {
english_stop: {
type: 'stop',
stopwords: '_english_',
},

english_stemmer: {
type: 'stemmer',
language: 'english',
},

english_possessive_stemmer: {
type: 'stemmer',
language: 'possessive_english',
},

my_posfilter: {
type: 'sudachi_part_of_speech',
stoptags: [
'助詞',
'助動詞',
'補助記号,句点',
'補助記号,読点',
],
},
},
analyzer: {
verbatim: {
tokenizer: 'uax_url_email',
filter: %w(lowercase),
},

content: {
tokenizer: 'uax_url_email',
filter: %w(
english_possessive_stemmer
lowercase
asciifolding
cjk_width
english_stop
english_stemmer
),
},

hashtag: {
tokenizer: 'keyword',
filter: %w(
word_delimiter_graph
lowercase
asciifolding
cjk_width
),
},

sudachi_analyzer: {
tokenizer: 'sudachi_tokenizer',
type: 'custom',
filter: %w(
english_possessive_stemmer
lowercase
asciifolding
cjk_width
english_stop
english_stemmer
my_posfilter
sudachi_normalizedform
),
},
},
tokenizer: {
sudachi_tokenizer: {
resources_path: '/etc/elasticsearch/sudachi',
split_mode: 'A',
type: 'sudachi_tokenizer',
discard_punctuation: 'true',
},
},
}
# ElasticSearch config is moved to "/config/elasticsearch.default.yml".
# Edit it when original Mastodon changed ElasticSearch config.
settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: ChewyConfig.instance.statuses

index_scope ::Status.unscoped.kept.without_reblogs.includes(
:account,
Expand All @@ -107,8 +31,8 @@ class StatusesIndex < Chewy::Index
root date_detection: false do
field(:id, type: 'long')
field(:account_id, type: 'long')
field(:text, type: 'text', analyzer: 'sudachi_analyzer', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') }
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:text, type: 'text', analyzer: ChewyConfig.instance.statuses_analyzers.dig('text', 'analyzer'), value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: ChewyConfig.instance.statuses_analyzers.dig('text', 'stemmed', 'analyzer')) }
field(:tags, type: 'text', analyzer: ChewyConfig.instance.statuses_analyzers.dig('tags', 'analyzer'), value: ->(status) { status.tags.map(&:display_name) })
field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by })
field(:mentioned_by, type: 'long', value: ->(status) { status.mentioned_by })
field(:favourited_by, type: 'long', value: ->(status) { status.favourited_by })
Expand Down
37 changes: 6 additions & 31 deletions app/chewy/tags_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,9 @@
class TagsIndex < Chewy::Index
include DatetimeClampingConcern

settings index: index_preset(refresh_interval: '30s'), analysis: {
analyzer: {
content: {
tokenizer: 'keyword',
filter: %w(
word_delimiter_graph
lowercase
asciifolding
cjk_width
),
},

edge_ngram: {
tokenizer: 'edge_ngram',
filter: %w(
lowercase
asciifolding
cjk_width
),
},
},

tokenizer: {
edge_ngram: {
type: 'edge_ngram',
min_gram: 2,
max_gram: 15,
},
},
}
# ElasticSearch config is moved to "/config/elasticsearch.default.yml".
# Edit it when original Mastodon changed ElasticSearch config.
settings index: index_preset(refresh_interval: '30s'), analysis: ChewyConfig.instance.tags

index_scope ::Tag.listable

Expand All @@ -41,7 +14,9 @@ class TagsIndex < Chewy::Index
end

root date_detection: false do
field(:name, type: 'text', analyzer: 'content', value: :display_name) { field(:edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content') }
field(:name, type: 'text', analyzer: ChewyConfig.instance.tags_analyzers.dig('name', 'analyzer'), value: :display_name) do
field(:edge_ngram, type: 'text', analyzer: ChewyConfig.instance.tags_analyzers.dig('name', 'edge_ngram', 'analyzer'), search_analyzer: ChewyConfig.instance.tags_analyzers.dig('name', 'edge_ngram', 'search_analyzer'))
end
field(:reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? })
field(:usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts })
field(:last_status_at, type: 'date', value: ->(tag) { clamp_date(tag.last_status_at || tag.created_at) })
Expand Down
Loading

0 comments on commit a8fbcb3

Please sign in to comment.