From 3d123a99c9679aa795d3c1ba36853745b57e7d3f Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Thu, 28 Mar 2024 10:57:58 -0400 Subject: [PATCH 1/6] Create lb_search.py Function to search through subtitles in xklb-metadata.db. The video titles returned as results are used to enhance Calibre-Web's simple search. --- cps/lb_search.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 cps/lb_search.py diff --git a/cps/lb_search.py b/cps/lb_search.py new file mode 100644 index 0000000000..868d29976c --- /dev/null +++ b/cps/lb_search.py @@ -0,0 +1,31 @@ +import os +import re + +from . import logger +from .constants import XKLB_DB_FILE +from .subproc_wrapper import process_open + +log = logger.create() + +def get_search_terms(term): + """Perform a search against xklb-metadata.db""" + video_titles = [] + lb_executable = os.getenv("LB_WRAPPER", "lb-wrapper") + + if term: + subprocess_args = [lb_executable, "search", term] + log.debug("Executing: %s", subprocess_args) + + try: + p = process_open(subprocess_args, newlines=True) + stdout, stderr = p.communicate() + if p.returncode != 0: + log.error("Error executing lb-wrapper: %s", stderr) + return video_titles + pattern = r"^[^\d\n].*?(?= - )" + matches = re.findall(pattern, stdout, re.MULTILINE) + video_titles.extend(matches) + except Exception as ex: + log.error("Error executing lb-wrapper: %s", ex) + + return video_titles From 7ffe29d6a8a0fb5f19d9d4987e21aab4fff4f4c9 Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Thu, 28 Mar 2024 11:02:23 -0400 Subject: [PATCH 2/6] Enhance results using xklb search --- cps/db.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cps/db.py b/cps/db.py index ceb692ec21..ba95f814f4 100644 --- a/cps/db.py +++ b/cps/db.py @@ -45,7 +45,7 @@ from flask_babel import get_locale from flask import flash -from . import logger, ub, isoLanguages +from . import logger, ub, isoLanguages, lb_search from .pagination import Pagination from weakref import WeakSet @@ -954,7 +954,15 @@ def get_cc_columns(self, config, filter_config_custom_read=False): def get_search_results(self, term, config, offset=None, order=None, limit=None, *join): order = order[0] if order else [Books.sort] pagination = None - result = self.search_query(term, config, *join).order_by(*order).all() + + # search also through the subtitles (for videos) + other_terms = lb_search.get_search_terms(term) + term = [term] + other_terms + + result = list() + for term_part in term: + result += self.search_query(term_part, config, *join).order_by(*order).all() + result = list(set(result)) result_count = len(result) if offset != None and limit != None: offset = int(offset) From a5486db89c4db127152a02815bc59d2e76ed2a31 Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Tue, 28 May 2024 10:42:00 -0400 Subject: [PATCH 3/6] Add explanations --- cps/db.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cps/db.py b/cps/db.py index b8f7005791..f418039bb1 100644 --- a/cps/db.py +++ b/cps/db.py @@ -960,11 +960,14 @@ def get_search_results(self, term, config, offset=None, order=None, limit=None, # search also through the subtitles (for videos) other_terms = lb_search.get_search_terms(term) + # lb_search.get_search_terms returns a list of video titles, "term" parameter is expected to be a book/video title term = [term] + other_terms result = list() for term_part in term: + # the search_query function below only searches for books titles result += self.search_query(term_part, config, *join).order_by(*order).all() + # we need to remove duplicates because the same book/video could be found multiple times result = list(set(result)) result_count = len(result) if offset != None and limit != None: From 3b907dc08ba3a13d697796eb600d105af3d62e9a Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Sat, 31 Aug 2024 17:11:29 -0400 Subject: [PATCH 4/6] Keep output tidy for regex --- scripts/lb-wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lb-wrapper b/scripts/lb-wrapper index f132319daa..7850133ef4 100755 --- a/scripts/lb-wrapper +++ b/scripts/lb-wrapper @@ -83,7 +83,7 @@ if [[ $XKLB_INTERNAL_CMD == "tubeadd" ]]; then elif [[ $XKLB_INTERNAL_CMD == "dl" ]]; then xklb_full_cmd="lb dl '${XKLB_DB_FILE}' --video --search '${URL_OR_SEARCH_TERM}' ${FORMAT_OPTIONS} --write-thumbnail --subs --live --live-from-start -o '${OUTTMPL}' ${VERBOSITY}" elif [[ $XKLB_INTERNAL_CMD == "search" ]]; then - xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}'" + xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}' --to-json" else log "Error" "Invalid xklb command. Please use 'tubeadd', 'dl' or 'search'." exit 1 From 9d9b72db1723261b0e8eecdee8d5984d883a8d3b Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Sat, 31 Aug 2024 17:56:18 -0400 Subject: [PATCH 5/6] Add --to-json --- scripts/lb-wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lb-wrapper b/scripts/lb-wrapper index 7850133ef4..f132319daa 100755 --- a/scripts/lb-wrapper +++ b/scripts/lb-wrapper @@ -83,7 +83,7 @@ if [[ $XKLB_INTERNAL_CMD == "tubeadd" ]]; then elif [[ $XKLB_INTERNAL_CMD == "dl" ]]; then xklb_full_cmd="lb dl '${XKLB_DB_FILE}' --video --search '${URL_OR_SEARCH_TERM}' ${FORMAT_OPTIONS} --write-thumbnail --subs --live --live-from-start -o '${OUTTMPL}' ${VERBOSITY}" elif [[ $XKLB_INTERNAL_CMD == "search" ]]; then - xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}' --to-json" + xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}'" else log "Error" "Invalid xklb command. Please use 'tubeadd', 'dl' or 'search'." exit 1 From 3ec51a92984842644311425d62729c9f43077497 Mon Sep 17 00:00:00 2001 From: Blondel MONDESIR Date: Sat, 31 Aug 2024 18:11:33 -0400 Subject: [PATCH 6/6] Suppress info logging for search output in STDOUT --- scripts/lb-wrapper | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/scripts/lb-wrapper b/scripts/lb-wrapper index f132319daa..c0327e3c6f 100755 --- a/scripts/lb-wrapper +++ b/scripts/lb-wrapper @@ -83,7 +83,7 @@ if [[ $XKLB_INTERNAL_CMD == "tubeadd" ]]; then elif [[ $XKLB_INTERNAL_CMD == "dl" ]]; then xklb_full_cmd="lb dl '${XKLB_DB_FILE}' --video --search '${URL_OR_SEARCH_TERM}' ${FORMAT_OPTIONS} --write-thumbnail --subs --live --live-from-start -o '${OUTTMPL}' ${VERBOSITY}" elif [[ $XKLB_INTERNAL_CMD == "search" ]]; then - xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}'" + xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}' --to-json" else log "Error" "Invalid xklb command. Please use 'tubeadd', 'dl' or 'search'." exit 1 @@ -91,18 +91,24 @@ fi log "Info" "Running xklb command: ${xklb_full_cmd}" -# >(...) "process substitution" explained at https://unix.stackexchange.com/a/324170 -# 1>&2 redirect back-to-STDERR to avoid nested (repeat) logging, explained at https://stackoverflow.com/a/15936384 -eval "${xklb_full_cmd}" \ - > >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Info" "$line"; fi; done) \ - 2> >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Debug" "$line" 1>&2; fi; done) & -# 2024-01-11: HOW THIS WORKS... -# 0) xklb sends a flood of yt-dlp status message lines like "downloading 59.8% 2.29MiB/s" to STDERR. -# 1) Then, "2> >(...)" reroutes (only those raw lines!) to STDIN, instead of logging them (as "Debug" lines). -# 2) Then, upon receiving them at STDIN, "> >(...)" again prevents (only those raw lines!) from being logged (as "Info" messages). -# 3) Then, cps/tasks/download.py uses an equivalent Python REGEX to flag (only these raw lines! yes a 3rd time!) -# parsing them to send "percentage download progress" info along to the "Tasks" view in the web front-end: -# https://github.com/iiab/calibre-web/blob/870bf6d35f890712f44ce711f3f8f6b541ccf1fe/cps/tasks/download.py#L46 +if [[ $XKLB_INTERNAL_CMD == "search" ]]; then + eval "${xklb_full_cmd}" \ + > >(while read -r line; do echo "$line"; done) \ + 2> >(while read -r line; do echo "$line" 1>&2; done) & +else + # >(...) "process substitution" explained at https://unix.stackexchange.com/a/324170 + # 1>&2 redirect back-to-STDERR to avoid nested (repeat) logging, explained at https://stackoverflow.com/a/15936384 + eval "${xklb_full_cmd}" \ + > >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Info" "$line"; fi; done) \ + 2> >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Debug" "$line" 1>&2; fi; done) & + # 2024-01-11: HOW THIS WORKS... + # 0) xklb sends a flood of yt-dlp status message lines like "downloading 59.8% 2.29MiB/s" to STDERR. + # 1) Then, "2> >(...)" reroutes (only those raw lines!) to STDIN, instead of logging them (as "Debug" lines). + # 2) Then, upon receiving them at STDIN, "> >(...)" again prevents (only those raw lines!) from being logged (as "Info" messages). + # 3) Then, cps/tasks/download.py uses an equivalent Python REGEX to flag (only these raw lines! yes a 3rd time!) + # parsing them to send "percentage download progress" info along to the "Tasks" view in the web front-end: + # https://github.com/iiab/calibre-web/blob/870bf6d35f890712f44ce711f3f8f6b541ccf1fe/cps/tasks/download.py#L46 +fi pid=$!