forked from stashapp/stash-box
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from javstash/ja-search-support
Ja search support
- Loading branch information
Showing
8 changed files
with
259 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,41 @@ | ||
FROM postgres:14.2 | ||
ARG POSTGRES_VERSION=17 | ||
FROM postgres:$POSTGRES_VERSION AS build | ||
ARG POSTGRES_VERSION=17 | ||
RUN apt-get update && apt-get install -y --no-install-recommends postgresql-server-dev-$POSTGRES_VERSION gcc make icu-devtools libicu-dev | ||
|
||
RUN buildDeps='git make gcc postgresql-server-dev-14' \ | ||
RUN mkdir -p /root/parser | ||
WORKDIR /root/parser | ||
COPY pg_cjk_parser.c /root/parser/ | ||
COPY pg_cjk_parser.control /root/parser/ | ||
COPY Makefile /root/parser/ | ||
COPY pg_cjk_parser--0.0.1.sql /root/parser/ | ||
COPY zht2zhs.h /root/parser/ | ||
RUN make clean && make USE_PGXS=1 install | ||
|
||
FROM postgres:17.2-bookworm | ||
|
||
ARG POSTGRES_VERSION=17 | ||
COPY --from=build /root/parser/pg_cjk_parser.bc /usr/lib/postgresql/$POSTGRES_VERSION/lib/bitcode | ||
COPY --from=build /root/parser/pg_cjk_parser.so /usr/lib/postgresql/$POSTGRES_VERSION/lib | ||
COPY --from=build /root/parser/pg_cjk_parser--0.0.1.sql /usr/share/postgresql/$POSTGRES_VERSION/extension | ||
COPY --from=build /root/parser/pg_cjk_parser.control /usr/share/postgresql/$POSTGRES_VERSION/extension | ||
|
||
RUN buildDeps='git make build-essential postgresql-server-dev-17 wget libicu-dev' \ | ||
&& apt update && apt install -y $buildDeps --no-install-recommends --reinstall ca-certificates \ | ||
&& git clone https://github.com/fake-name/pg-spgist_hamming.git \ | ||
&& make -C pg-spgist_hamming/bktree \ | ||
&& make -C pg-spgist_hamming/bktree install \ | ||
&& rm -rf pg-spgist_hamming \ | ||
&& apt purge -y --auto-remove $buildDeps | ||
&& git clone https://github.com/evirma/pg_bktree.git /usr/local/src/bktree \ | ||
&& cd /usr/local/src/bktree \ | ||
&& make USE_PGXS=1 && make USE_PGXS=1 install \ | ||
&& cd .. \ | ||
&& wget https://github.com/pgbigm/pg_bigm/archive/refs/tags/v1.2-20240606.tar.gz \ | ||
&& tar zxf v1.2-20240606.tar.gz \ | ||
&& cd pg_bigm-1.2-20240606 \ | ||
&& make USE_PGXS=1 && make USE_PGXS=1 install \ | ||
&& echo shared_preload_libraries='pg_bigm' >> /var/lib/postgresql/data/postgresql.conf \ | ||
&& cd .. \ | ||
&& rm -rf pg_bigm-1.2-20240606 \ | ||
&& rm -rf bktree \ | ||
&& apt purge -y --auto-remove $buildDeps \ | ||
&& apt clean | ||
|
||
EXPOSE 5432 | ||
CMD docker-entrypoint.sh postgres |
106 changes: 106 additions & 0 deletions
106
pkg/database/migrations/postgres-ja/1_ja_parser_postinstallation.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
CREATE EXTENSION pg_cjk_parser; | ||
|
||
CREATE TEXT SEARCH PARSER public.pg_cjk_parser ( | ||
START = prsd2_cjk_start, | ||
GETTOKEN = prsd2_cjk_nexttoken, | ||
END = prsd2_cjk_end, | ||
LEXTYPES = prsd2_cjk_lextype, | ||
HEADLINE = prsd2_cjk_headline); | ||
|
||
CREATE TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ( | ||
PARSER = pg_cjk_parser | ||
); | ||
|
||
SET default_text_search_config = 'public.config_2_gram_cjk'; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR asciihword | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR cjk | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR email | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR asciiword | ||
WITH english_stem; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR entity | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR file | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR float | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR host | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR hword | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR hword_asciipart | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR hword_numpart | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR hword_part | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR int | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR numhword | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR numword | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR protocol | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR sfloat | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR tag | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR uint | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR url | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR url_path | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR version | ||
WITH simple; | ||
|
||
ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk | ||
ADD MAPPING FOR word | ||
WITH simple; |
97 changes: 97 additions & 0 deletions
97
pkg/database/migrations/postgres-ja/2_ja_bigm_textsearch.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
DO $$ | ||
BEGIN | ||
IF current_setting('is_superuser') = 'on' THEN | ||
CREATE EXTENSION IF NOT EXISTS pg_bigm; | ||
END IF; | ||
END$$; | ||
|
||
-- From #35, use GIN and switch back to default | ||
|
||
DROP INDEX scene_search_ts_idx; | ||
CREATE INDEX scene_search_ts_idx ON scene_search USING gist ( | ||
( | ||
to_tsvector('config_2_gram_cjk', COALESCE(scene_date, '')) || | ||
to_tsvector('config_2_gram_cjk', studio_name) || | ||
to_tsvector('config_2_gram_cjk', COALESCE(performer_names, '')) || | ||
to_tsvector('config_2_gram_cjk', scene_title) || | ||
to_tsvector('config_2_gram_cjk', COALESCE(scene_code, '')) | ||
) | ||
); | ||
|
||
-- From #2, gin_bigm_ops instead of gin_trgm_ops | ||
|
||
DROP INDEX name_trgm_idx; | ||
DROP INDEX name_bigm_idx; | ||
CREATE INDEX name_bigm_idx ON "performers" USING GIN ("name" gin_bigm_ops); | ||
|
||
-- From #12, gin_bigm_ops instead of gin_trgm_ops | ||
|
||
DROP INDEX disambiguation_trgm_idx; | ||
DROP INDEX disambiguation_bigm_idx; | ||
CREATE INDEX disambiguation_bigm_idx ON "performers" USING GIN ("disambiguation" gin_bigm_ops); | ||
DROP INDEX performer_alias_trgm_idx; | ||
DROP INDEX performer_alias_bigm_idx; | ||
CREATE INDEX performer_alias_bigm_idx ON "performer_aliases" USING GIN ("alias" gin_bigm_ops); | ||
|
||
-- From #35 with the regex function around the scene title removed | ||
|
||
CREATE OR REPLACE FUNCTION update_scene() RETURNS TRIGGER AS $$ | ||
BEGIN | ||
IF (NEW.title != OLD.title OR NEW.date != OLD.date OR NEW.studio_id != OLD.studio_id OR COALESCE(NEW.code, '') != COALESCE(OLD.code, '')) THEN | ||
UPDATE scene_search | ||
SET | ||
scene_title = NEW.title, | ||
scene_date = NEW.date, | ||
studio_name = SUBQUERY.studio_name, | ||
scene_code = NEW.code | ||
FROM ( | ||
SELECT S.id as sid, T.name || ' ' || CASE WHEN TP.name IS NOT NULL THEN (TP.name) ELSE '' END AS studio_name | ||
FROM scenes S | ||
JOIN studios T ON S.studio_id = T.id | ||
LEFT JOIN studios TP ON T.parent_studio_id = TP.id | ||
) SUBQUERY | ||
WHERE scene_id = NEW.id | ||
AND scene_id = SUBQUERY.sid; | ||
END IF; | ||
RETURN NULL; | ||
END; | ||
$$ LANGUAGE plpgsql; --The trigger used to update a table. | ||
|
||
-- From #35 with the regex function around the scene title removed | ||
|
||
CREATE OR REPLACE FUNCTION insert_scene() RETURNS TRIGGER AS $$ | ||
BEGIN | ||
INSERT INTO scene_search (scene_id, scene_title, scene_date, studio_name, scene_code) | ||
SELECT | ||
NEW.id, | ||
NEW.title, | ||
NEW.date, | ||
T.name || ' ' || CASE WHEN TP.name IS NOT NULL THEN (TP.name) ELSE '' END, | ||
NEW.code | ||
FROM studios T | ||
LEFT JOIN studios TP ON T.parent_studio_id = TP.id | ||
WHERE T.id = NEW.studio_id; | ||
RETURN NULL; | ||
END; | ||
$$ LANGUAGE plpgsql; --The trigger used to update a table. | ||
|
||
|
||
TRUNCATE TABLE scene_search; | ||
|
||
-- From #35 with the regex function around the scene title removed | ||
|
||
INSERT INTO scene_search | ||
SELECT | ||
S.id as scene_id, | ||
S.title AS scene_title, | ||
S.date::TEXT AS scene_date, | ||
T.name || ' ' || CASE WHEN TP.name IS NOT NULL THEN (TP.name) ELSE '' END AS studio_name, | ||
ARRAY_TO_STRING(ARRAY_CAT(ARRAY_AGG(P.name), ARRAY_AGG(PS.as)), ' ', '') AS performer_names, | ||
S.code as scene_code | ||
FROM scenes S | ||
LEFT JOIN scene_performers PS ON PS.scene_id = S.id | ||
LEFT JOIN performers P ON PS.performer_id = P.id | ||
LEFT JOIN studios T ON T.id = S.studio_id | ||
LEFT JOIN studios TP ON T.parent_studio_id = TP.id | ||
GROUP BY S.id, S.title, T.name, TP.name; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters