Skip to content

Commit

Permalink
[FEATURE] Filter Keywords prebuilt preset (#1086)
Browse files Browse the repository at this point in the history
Adds ``Filter Keywords``, a preset that can include or exclude media with any of the listed keywords. Both keywords and title/description are lower-cased before filtering.

Supports the following override variables:

* ``title_include_keywords``
* ``title_exclude_keywords``
* ``description_include_keywords``
* ``description_exclude_keywords``

For best usage, use the `~` tilda subscription mode to set a subscription's list override variables.
Tilda mode allows override variables to be set directly underneath it.

```
Plex TV Show by Date | Filter Keywords:
  
  = Documentaries:
    "~NOVA PBS":
       url: "https://www.youtube.com/@novapbs"
       title_exclude_keywords:
         - "preview"
         - "trailer"  

    "~To Catch a Smuggler":
       url: "https://www.youtube.com/@NatGeo"
       title_include_keywords:
         - "To Catch a Smuggler"
```
  • Loading branch information
jmbannon authored Oct 5, 2024
1 parent 0acf4c9 commit 28c2968
Show file tree
Hide file tree
Showing 13 changed files with 487 additions and 9 deletions.
5 changes: 5 additions & 0 deletions docs/source/config_reference/prebuilt_presets/common.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ Common

.. highlight:: yaml

Filtering
-------------

.. literalinclude:: /../../src/ytdl_sub/prebuilt_presets/helpers/filtering.yaml

Media Quality
-------------

Expand Down
7 changes: 6 additions & 1 deletion docs/source/faq/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ See `yt-dl's recommended way <https://github.com/ytdl-org/youtube-dl#how-do-i-pa
...download large channels?
~~~~~~~~~~~~~~~~~~~~~~~~~~~

You can use the prebuilt preset :doc:`chunk_initial_download </prebuilt_presets/helpers>`
See the prebuilt preset :doc:`chunk_initial_download </prebuilt_presets/helpers>`.

...filter to include or exclude based on certain keywords?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

See the prebuilt preset :doc:`Filter Keywords </prebuilt_presets/helpers>`.

There is a bug where...
-----------------------
Expand Down
34 changes: 34 additions & 0 deletions docs/source/prebuilt_presets/helpers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,40 @@ Add the following preset to download the best available audio and video quality,

``max_1080p``

Filter Keywords
---------------

``Filter Keywords`` can include or exclude media with any of the listed keywords. Both keywords and title/description are lower-cased before filtering.

Supports the following override variables:

* ``title_include_keywords``
* ``title_exclude_keywords``
* ``description_include_keywords``
* ``description_exclude_keywords``

.. tip::

Use the `~` tilda subscription mode to set a subscription's list override variables.
Tilda mode allows override variables to be set directly underneath it.

.. code-block:: yaml
Plex TV Show by Date | Filter Keywords:
= Documentaries:
"~NOVA PBS":
url: "https://www.youtube.com/@novapbs"
title_exclude_keywords:
- "preview"
- "trailer"
"~To Catch a Smuggler":
url: "https://www.youtube.com/@NatGeo"
title_include_keywords:
- "To Catch a Smuggler"
Chunk Initial Download
----------------------

Expand Down
52 changes: 52 additions & 0 deletions src/ytdl_sub/prebuilt_presets/helpers/filtering.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
presets:

#############################################################################
# Include Keywords
# Include or exclude media with any of the listed keywords in their titles
# Keywords will check a lower-cased title or description
Filter Keywords:
overrides:
# default filter lists to be empty
title_include_keywords: "{ [] }"
title_exclude_keywords: "{ [] }"
description_include_keywords: "{ [] }"
description_exclude_keywords: "{ [] }"

"%ensure_string": >-
{
%assert_then(
%is_string($0),
%lower($0),
"filter keywords must be strings"
)
}
"%ensure_lower_array": >-
{
%assert_then(
%is_array($0),
%array_apply(
$0,
%ensure_string
),
%concat($1," must be an array")
)
}
# $0 - var to evaluate
# $1 - keyword list
# $2 - variable name for error messages
# $3 - default return if keyword list is empty
"%contains_keywords": >-
{
%if(
%bool( $1 ),
%contains_any( %lower($0), %ensure_lower_array($1, $2) ),
$3
)
}
filter_exclude:
- "{ %not( %contains_keywords(title, title_include_keywords, 'title_include_keywords', true) ) }"
- "{ %not( %contains_keywords(description, description_include_keywords, 'description_include_keywords', true) ) }"
- "{ %contains_keywords(title, title_exclude_keywords, 'title_exclude_keywords', false) }"
- "{ %contains_keywords(description, description_exclude_keywords, 'description_exclude_keywords',false) }"
17 changes: 10 additions & 7 deletions src/ytdl_sub/script/functions/error_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ def assert_(value: ReturnableArgument, assert_message: String) -> ReturnableArgu
Explicitly throw an error with the provided assert message if ``value`` evaluates to
False. If it evaluates to True, it will return ``value``.
"""
if not bool(value.value):
evaluated_val = value.value()
if not bool(evaluated_val.value):
raise UserThrownRuntimeError(assert_message)
return value
return evaluated_val

@staticmethod
def assert_then(
Expand All @@ -35,7 +36,7 @@ def assert_then(
"""
if not bool(value.value):
raise UserThrownRuntimeError(assert_message)
return ret
return ret.value()

@staticmethod
def assert_eq(
Expand All @@ -46,9 +47,10 @@ def assert_eq(
Explicitly throw an error with the provided assert message if ``value`` does not equal
``equals``. If they do equal, then return ``value``.
"""
if not value.value == equals.value:
evaluated_val = value.value()
if not evaluated_val.value == equals.value:
raise UserThrownRuntimeError(assert_message)
return value
return evaluated_val

@staticmethod
def assert_ne(
Expand All @@ -59,6 +61,7 @@ def assert_ne(
Explicitly throw an error with the provided assert message if ``value`` equals
``equals``. If they do equal, then return ``value``.
"""
if value.value == equals.value:
evaluated_value = value.value()
if evaluated_value.value == equals.value:
raise UserThrownRuntimeError(assert_message)
return value
return evaluated_value
2 changes: 1 addition & 1 deletion src/ytdl_sub/script/types/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def resolve(
resolved_variables: Dict[Variable, Resolvable],
custom_functions: Dict[str, "VariableDependency"],
) -> Resolvable:
# TODO: Make conditionals not execute all branches!!!
# Ensure conditionals do not execute all branches
conditional_return_args = self.function_spec.conditional_arg_indices(
num_input_args=len(self.args)
)
Expand Down
4 changes: 4 additions & 0 deletions src/ytdl_sub/script/utils/type_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ def conditional_arg_indices(self, num_input_args: int) -> List[int]:
return list(range(1, num_input_args, 2)) + [num_input_args - 1]
if self.function_name == "if_passthrough":
return [0, 1] # true-passthrough, false-passthrough
if self.function_name in ("assert", "assert_eq", "assert_ne"):
return [0]
if self.function_name == "assert_then":
return [1]
return []

@property
Expand Down
171 changes: 171 additions & 0 deletions tests/integration/prebuilt_presets/test_filter_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import re

import pytest
from expected_transaction_log import assert_transaction_log_matches

from ytdl_sub.script.utils.exceptions import UserThrownRuntimeError
from ytdl_sub.subscriptions.subscription import Subscription
from ytdl_sub.utils.exceptions import ValidationException


@pytest.fixture
def filter_subscription_dict(output_directory):
return {
"preset": [
"Plex TV Show by Date",
"Filter Keywords",
],
"overrides": {"url": "https://your.name.here", "tv_show_directory": output_directory},
}


class TestFilterKeywords:

def test_no_overrides(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
):
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/filter_keywords_empty.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_title(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"title_{filter_mode}_keywords"] = [
"not included",
"MOCK ENTRY 20-3",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/title_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize("filter_mode", ["include", "exclude"])
def test_description(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
filter_mode: str,
):
filter_subscription_dict["overrides"][f"description_{filter_mode}_keywords"] = [
"no filter here",
"description",
]
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with mock_download_collection_entries(
is_youtube_channel=False, num_urls=1, is_dry_run=True
):
transaction_log = subscription.download(dry_run=True)

assert_transaction_log_matches(
output_directory=output_directory,
transaction_log=transaction_log,
transaction_log_summary_file_name=f"integration/prebuilt_presets/description_filter_keywords_{filter_mode}.txt",
)

@pytest.mark.parametrize(
"keyword_variable",
[
"title_include_keywords",
"title_exclude_keywords",
"description_include_keywords",
"description_exclude_keywords",
],
)
def test_error_not_list_type(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
keyword_variable,
):
filter_subscription_dict["overrides"][keyword_variable] = "not array"
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with (
mock_download_collection_entries(is_youtube_channel=False, num_urls=1, is_dry_run=True),
pytest.raises(UserThrownRuntimeError, match=f"{keyword_variable} must be an array"),
):
_ = subscription.download(dry_run=True)

@pytest.mark.parametrize(
"keyword_variable",
[
"title_include_keywords",
"title_exclude_keywords",
"description_include_keywords",
"description_exclude_keywords",
],
)
def test_error_not_string_keyword(
self,
config,
filter_subscription_dict,
output_directory,
subscription_name,
mock_download_collection_entries,
keyword_variable,
):
filter_subscription_dict["overrides"][keyword_variable] = "{['str', ['nested array not']]}"
subscription = Subscription.from_dict(
config=config,
preset_name=subscription_name,
preset_dict=filter_subscription_dict,
)

with (
mock_download_collection_entries(is_youtube_channel=False, num_urls=1, is_dry_run=True),
pytest.raises(UserThrownRuntimeError, match="filter keywords must be strings"),
):
_ = subscription.download(dry_run=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
No new, modified, or removed files in '{output_directory}'
Loading

0 comments on commit 28c2968

Please sign in to comment.