iiab · deldesir · Mar 28, 2024 · Mar 28, 2024 · Apr 11, 2024 · Apr 15, 2024
diff --git a/cps/db.py b/cps/db.py
@@ -47,7 +47,7 @@
 from flask_babel import get_locale
 from flask import flash
 
-from . import logger, ub, isoLanguages
+from . import logger, ub, isoLanguages, lb_search
 from .pagination import Pagination
 from .string_helper import strip_whitespaces
 
@@ -973,7 +973,18 @@ def get_cc_columns(self, config, filter_config_custom_read=False):
     def get_search_results(self, term, config, offset=None, order=None, limit=None, *join):
         order = order[0] if order else [Books.sort]
         pagination = None
-        result = self.search_query(term, config, *join).order_by(*order).all()
+
+        # search also through the subtitles (for videos)
+        other_terms = lb_search.get_search_terms(term)
+        # lb_search.get_search_terms returns a list of video titles, "term" parameter is expected to be a book/video title
+        term = [term] + other_terms
+
+        result = list()
+        for term_part in term:
+            # the search_query function below only searches for books titles
+            result += self.search_query(term_part, config, *join).order_by(*order).all()
+        # we need to remove duplicates because the same book/video could be found multiple times
+        result = list(set(result))
 def search_query(self, term, config, *join): 
     term.strip().lower() 
     self.session.connection().connection.connection.create_function("lower", 1, lcase) 
     q = list() 
     author_terms = re.split("[, ]+", term) 
     for author_term in author_terms: 
         q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%"))) 
     query = self.generate_linked_query(config.config_read_column, Books) 
     if len(join) == 6: 
         query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5]) 
     if len(join) == 3: 
         query = query.outerjoin(join[0], join[1]).outerjoin(join[2]) 
     elif len(join) == 2: 
         query = query.outerjoin(join[0], join[1]) 
     elif len(join) == 1: 
         query = query.outerjoin(join[0]) 
     cc = self.get_cc_columns(config, filter_config_custom_read=True) 
     filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")), 
                          Books.series.any(func.lower(Series.name).ilike("%" + term + "%")), 
                          Books.authors.any(and_(*q)), 
                          Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")), 
                          func.lower(Books.title).ilike("%" + term + "%")] 
     for c in cc: 
         if c.datatype not in ["datetime", "rating", "bool", "int", "float"]: 
             filter_expression.append( 
                 getattr(Books, 
                         'custom_column_' + str(c.id)).any( 
                     func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) 
     return query.filter(self.common_filters(True)).filter(or_(*filter_expression)) 
 class Books(Base): 
     __tablename__ = 'books' 
     DEFAULT_PUBDATE = datetime(101, 1, 1, 0, 0, 0, 0)  # ("0101-01-01 00:00:00+00:00") 
     id = Column(Integer, primary_key=True, autoincrement=True) 
     title = Column(String(collation='NOCASE'), nullable=False, default='Unknown') 
     sort = Column(String(collation='NOCASE')) 
     author_sort = Column(String(collation='NOCASE')) 
     timestamp = Column(TIMESTAMP, default=datetime.utcnow) 
     pubdate = Column(TIMESTAMP, default=DEFAULT_PUBDATE) 
     series_index = Column(String, nullable=False, default="1.0") 
     last_modified = Column(TIMESTAMP, default=datetime.utcnow) 
     path = Column(String, default="", nullable=False) 
     has_cover = Column(Integer, default=0) 
     uuid = Column(String) 
     isbn = Column(String(collation='NOCASE'), default="") 
     flags = Column(Integer, nullable=False, default=1) 
     authors = relationship(Authors, secondary=books_authors_link, backref='books') 
     tags = relationship(Tags, secondary=books_tags_link, backref='books', order_by="Tags.name") 
     comments = relationship(Comments, backref='books') 
     data = relationship(Data, backref='books') 
     series = relationship(Series, secondary=books_series_link, backref='books') 
     ratings = relationship(Ratings, secondary=books_ratings_link, backref='books') 
     languages = relationship(Languages, secondary=books_languages_link, backref='books') 
     publishers = relationship(Publishers, secondary=books_publishers_link, backref='books') 
     identifiers = relationship(Identifiers, backref='books') 
 def search_query(self, term, config, *join): 
     term.strip().lower() 
     self.session.connection().connection.connection.create_function("lower", 1, lcase) 
     q = list() 
     author_terms = re.split("[, ]+", term) 
     for author_term in author_terms: 
         q.append(Books.authors.any(func.lower(Authors.name).ilike("%" + author_term + "%"))) 
     query = self.generate_linked_query(config.config_read_column, Books) 
     if len(join) == 6: 
         query = query.outerjoin(join[0], join[1]).outerjoin(join[2]).outerjoin(join[3], join[4]).outerjoin(join[5]) 
     if len(join) == 3: 
         query = query.outerjoin(join[0], join[1]).outerjoin(join[2]) 
     elif len(join) == 2: 
         query = query.outerjoin(join[0], join[1]) 
     elif len(join) == 1: 
         query = query.outerjoin(join[0]) 
  
     cc = self.get_cc_columns(config, filter_config_custom_read=True) 
     filter_expression = [Books.tags.any(func.lower(Tags.name).ilike("%" + term + "%")), 
                          Books.series.any(func.lower(Series.name).ilike("%" + term + "%")), 
                          Books.authors.any(and_(*q)), 
                          Books.publishers.any(func.lower(Publishers.name).ilike("%" + term + "%")), 
                          func.lower(Books.title).ilike("%" + term + "%")] 
     for c in cc: 
         if c.datatype not in ["datetime", "rating", "bool", "int", "float"]: 
             filter_expression.append( 
                 getattr(Books, 
                         'custom_column_' + str(c.id)).any( 
                     func.lower(cc_classes[c.id].value).ilike("%" + term + "%"))) 
     return query.filter(self.common_filters(True)).filter(or_(*filter_expression)) 
 class Books(Base): 
     __tablename__ = 'books' 
  
     DEFAULT_PUBDATE = datetime(101, 1, 1, 0, 0, 0, 0)  # ("0101-01-01 00:00:00+00:00") 
  
     id = Column(Integer, primary_key=True, autoincrement=True) 
     title = Column(String(collation='NOCASE'), nullable=False, default='Unknown') 
     sort = Column(String(collation='NOCASE')) 
     author_sort = Column(String(collation='NOCASE')) 
     timestamp = Column(TIMESTAMP, default=datetime.utcnow) 
     pubdate = Column(TIMESTAMP, default=DEFAULT_PUBDATE) 
     series_index = Column(String, nullable=False, default="1.0") 
     last_modified = Column(TIMESTAMP, default=datetime.utcnow) 
     path = Column(String, default="", nullable=False) 
     has_cover = Column(Integer, default=0) 
     uuid = Column(String) 
     isbn = Column(String(collation='NOCASE'), default="") 
     flags = Column(Integer, nullable=False, default=1) 
  
     authors = relationship(Authors, secondary=books_authors_link, backref='books') 
     tags = relationship(Tags, secondary=books_tags_link, backref='books', order_by="Tags.name") 
     comments = relationship(Comments, backref='books') 
     data = relationship(Data, backref='books') 
     series = relationship(Series, secondary=books_series_link, backref='books') 
     ratings = relationship(Ratings, secondary=books_ratings_link, backref='books') 
     languages = relationship(Languages, secondary=books_languages_link, backref='books') 
     publishers = relationship(Publishers, secondary=books_publishers_link, backref='books') 
     identifiers = relationship(Identifiers, backref='books') 
         result_count = len(result)
         if offset is not None and limit is not None:
             offset = int(offset)

diff --git a/cps/lb_search.py b/cps/lb_search.py
@@ -0,0 +1,31 @@
+import os
+import re
+
+from . import logger
+from .constants import XKLB_DB_FILE
+from .subproc_wrapper import process_open
+
+log = logger.create()
+
+def get_search_terms(term):
+    """Perform a search against xklb-metadata.db"""
+    video_titles = []
+    lb_executable = os.getenv("LB_WRAPPER", "lb-wrapper")
+
+    if term:
+        subprocess_args = [lb_executable, "search", term]
+        log.debug("Executing: %s", subprocess_args)
+
+        try:
+            p = process_open(subprocess_args, newlines=True)
+            stdout, stderr = p.communicate()
+            if p.returncode != 0:
+                log.error("Error executing lb-wrapper: %s", stderr)
+                return video_titles
+            pattern = r"^[^\d\n].*?(?= - )"
+            matches = re.findall(pattern, stdout, re.MULTILINE)
+            video_titles.extend(matches)
+        except Exception as ex:
+            log.error("Error executing lb-wrapper: %s", ex)
+
+    return video_titles
diff --git a/scripts/lb-wrapper b/scripts/lb-wrapper
@@ -83,26 +83,32 @@ if [[ $XKLB_INTERNAL_CMD == "tubeadd" ]]; then
 elif [[ $XKLB_INTERNAL_CMD == "dl" ]]; then
     xklb_full_cmd="lb dl '${XKLB_DB_FILE}' --video --search '${URL_OR_SEARCH_TERM}' ${FORMAT_OPTIONS} --write-thumbnail --subs --live --live-from-start -o '${OUTTMPL}' ${VERBOSITY}"
 elif [[ $XKLB_INTERNAL_CMD == "search" ]]; then
-    xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}'"
+    xklb_full_cmd="lb search '${XKLB_DB_FILE}' '${URL_OR_SEARCH_TERM}' --to-json"
 else
     log "Error" "Invalid xklb command. Please use 'tubeadd', 'dl' or 'search'."
     exit 1
 fi
 
 log "Info" "Running xklb command: ${xklb_full_cmd}"
 
-# >(...) "process substitution" explained at https://unix.stackexchange.com/a/324170
-# 1>&2 redirect back-to-STDERR to avoid nested (repeat) logging, explained at https://stackoverflow.com/a/15936384
-eval "${xklb_full_cmd}" \
-    > >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Info" "$line"; fi; done) \
-    2> >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Debug" "$line" 1>&2; fi; done) &
-# 2024-01-11: HOW THIS WORKS...
-# 0) xklb sends a flood of yt-dlp status message lines like "downloading  59.8%    2.29MiB/s" to STDERR.
-# 1) Then, "2> >(...)" reroutes (only those raw lines!) to STDIN, instead of logging them (as "Debug" lines).
-# 2) Then, upon receiving them at STDIN, "> >(...)" again prevents (only those raw lines!) from being logged (as "Info" messages).
-# 3) Then, cps/tasks/download.py uses an equivalent Python REGEX to flag (only these raw lines! yes a 3rd time!)
-#    parsing them to send "percentage download progress" info along to the "Tasks" view in the web front-end:
-#    https://github.com/iiab/calibre-web/blob/870bf6d35f890712f44ce711f3f8f6b541ccf1fe/cps/tasks/download.py#L46
+if [[ $XKLB_INTERNAL_CMD == "search" ]]; then
+    eval "${xklb_full_cmd}" \
+        > >(while read -r line; do echo "$line"; done) \
+        2> >(while read -r line; do echo "$line" 1>&2; done) &
+else
+    # >(...) "process substitution" explained at https://unix.stackexchange.com/a/324170
+    # 1>&2 redirect back-to-STDERR to avoid nested (repeat) logging, explained at https://stackoverflow.com/a/15936384
+    eval "${xklb_full_cmd}" \
+        > >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Info" "$line"; fi; done) \
+        2> >(while read -r line; do if match_pattern "$line"; then echo "$line"; else log "Debug" "$line" 1>&2; fi; done) &
+    # 2024-01-11: HOW THIS WORKS...
+    # 0) xklb sends a flood of yt-dlp status message lines like "downloading  59.8%    2.29MiB/s" to STDERR.
+    # 1) Then, "2> >(...)" reroutes (only those raw lines!) to STDIN, instead of logging them (as "Debug" lines).
+    # 2) Then, upon receiving them at STDIN, "> >(...)" again prevents (only those raw lines!) from being logged (as "Info" messages).
+    # 3) Then, cps/tasks/download.py uses an equivalent Python REGEX to flag (only these raw lines! yes a 3rd time!)
+    #    parsing them to send "percentage download progress" info along to the "Tasks" view in the web front-end:
+    #    https://github.com/iiab/calibre-web/blob/870bf6d35f890712f44ce711f3f8f6b541ccf1fe/cps/tasks/download.py#L46
+fi
 
 pid=$!