diff --git a/cps/db.py b/cps/db.py index 33b195fac3..4d3acf0688 100644 --- a/cps/db.py +++ b/cps/db.py @@ -47,7 +47,7 @@ from flask_babel import get_locale from flask import flash -from . import logger, ub, isoLanguages +from . import logger, ub, isoLanguages, lb_search from .pagination import Pagination from .string_helper import strip_whitespaces @@ -973,7 +973,18 @@ def get_cc_columns(self, config, filter_config_custom_read=False): def get_search_results(self, term, config, offset=None, order=None, limit=None, *join): order = order[0] if order else [Books.sort] pagination = None - result = self.search_query(term, config, *join).order_by(*order).all() + + # search also through the subtitles (for videos) + other_terms = lb_search.get_search_terms(term) + # lb_search.get_search_terms returns a list of video titles, "term" parameter is expected to be a book/video title + term = [term] + other_terms + + result = list() + for term_part in term: + # the search_query function below only searches for books titles + result += self.search_query(term_part, config, *join).order_by(*order).all() + # we need to remove duplicates because the same book/video could be found multiple times + result = list(set(result)) result_count = len(result) if offset is not None and limit is not None: offset = int(offset) diff --git a/cps/lb_search.py b/cps/lb_search.py new file mode 100644 index 0000000000..868d29976c --- /dev/null +++ b/cps/lb_search.py @@ -0,0 +1,31 @@ +import os +import re + +from . import logger +from .constants import XKLB_DB_FILE +from .subproc_wrapper import process_open + +log = logger.create() + +def get_search_terms(term): + """Perform a search against xklb-metadata.db""" + video_titles = [] + lb_executable = os.getenv("LB_WRAPPER", "lb-wrapper") + + if term: + subprocess_args = [lb_executable, "search", term] + log.debug("Executing: %s", subprocess_args) + + try: + p = process_open(subprocess_args, newlines=True) + stdout, stderr = p.communicate() + if p.returncode != 0: + log.error("Error executing lb-wrapper: %s", stderr) + return video_titles + pattern = r"^[^\d\n].*?(?= - )" + matches = re.findall(pattern, stdout, re.MULTILINE) + video_titles.extend(matches) + except Exception as ex: + log.error("Error executing lb-wrapper: %s", ex) + + return video_titles diff --git a/scripts/lb-wrapper b/scripts/lb-wrapper index f132319daa..c0327e3c6f 100755 --- a/scripts/lb-wrapper +++ b/scripts/lb-wrapper @@ -83,7 +83,7 @@ if [[ $XKLB_INTERNAL_CMD == "tubeadd" ]]; then elif [[ $XKLB_INTERNAL_CMD == "dl" ]]; then xklb_full_cmd="lb dl '${XKLB_DB_FILE}' --video --search '${URL_OR_SEARCH_TERM}' ${FORMAT_OPTIONS} --write-thumbnail --subs --live --live-from-start -o '${OUTTMPL}' ${VERBOSITY}" elif [[ $XKLB_INTERNAL_CMD == "search" ]]; then - xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}'" + xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}' --to-json" else log "Error" "Invalid xklb command. Please use 'tubeadd', 'dl' or 'search'." exit 1 @@ -91,18 +91,24 @@ fi log "Info" "Running xklb command: ${xklb_full_cmd}" -# >(...) "process substitution" explained at https://unix.stackexchange.com/a/324170 -# 1>&2 redirect back-to-STDERR to avoid nested (repeat) logging, explained at https://stackoverflow.com/a/15936384 -eval "${xklb_full_cmd}" \ - > >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Info" "$line"; fi; done) \ - 2> >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Debug" "$line" 1>&2; fi; done) & -# 2024-01-11: HOW THIS WORKS... -# 0) xklb sends a flood of yt-dlp status message lines like "downloading 59.8% 2.29MiB/s" to STDERR. -# 1) Then, "2> >(...)" reroutes (only those raw lines!) to STDIN, instead of logging them (as "Debug" lines). -# 2) Then, upon receiving them at STDIN, "> >(...)" again prevents (only those raw lines!) from being logged (as "Info" messages). -# 3) Then, cps/tasks/download.py uses an equivalent Python REGEX to flag (only these raw lines! yes a 3rd time!) -# parsing them to send "percentage download progress" info along to the "Tasks" view in the web front-end: -# https://github.com/iiab/calibre-web/blob/870bf6d35f890712f44ce711f3f8f6b541ccf1fe/cps/tasks/download.py#L46 +if [[ $XKLB_INTERNAL_CMD == "search" ]]; then + eval "${xklb_full_cmd}" \ + > >(while read -r line; do echo "$line"; done) \ + 2> >(while read -r line; do echo "$line" 1>&2; done) & +else + # >(...) "process substitution" explained at https://unix.stackexchange.com/a/324170 + # 1>&2 redirect back-to-STDERR to avoid nested (repeat) logging, explained at https://stackoverflow.com/a/15936384 + eval "${xklb_full_cmd}" \ + > >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Info" "$line"; fi; done) \ + 2> >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Debug" "$line" 1>&2; fi; done) & + # 2024-01-11: HOW THIS WORKS... + # 0) xklb sends a flood of yt-dlp status message lines like "downloading 59.8% 2.29MiB/s" to STDERR. + # 1) Then, "2> >(...)" reroutes (only those raw lines!) to STDIN, instead of logging them (as "Debug" lines). + # 2) Then, upon receiving them at STDIN, "> >(...)" again prevents (only those raw lines!) from being logged (as "Info" messages). + # 3) Then, cps/tasks/download.py uses an equivalent Python REGEX to flag (only these raw lines! yes a 3rd time!) + # parsing them to send "percentage download progress" info along to the "Tasks" view in the web front-end: + # https://github.com/iiab/calibre-web/blob/870bf6d35f890712f44ce711f3f8f6b541ccf1fe/cps/tasks/download.py#L46 +fi pid=$!