Skip to content

Commit

Permalink
Moved by tag from the fuzzy match group to exact match (#4481)
Browse files Browse the repository at this point in the history
* Moved by tag from the regex group to exact matchch

* Added the test for frisby

* Run linter
  • Loading branch information
szymon-polaczy authored Jun 13, 2024
1 parent 7cf12ce commit 47fe5df
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
2 changes: 1 addition & 1 deletion catalog/dags/common/storage/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
"no person",
"squareformat",
"undefined",
"by",
}

# Filter out tags that contain the following terms. All entrées should be lowercase.
TAG_CONTAINS_DENYLIST = {
":",
"=",
"by",
"by-nc",
"by-nc-nd",
"by-nc-sa",
Expand Down
7 changes: 5 additions & 2 deletions catalog/tests/dags/common/storage/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,8 +565,11 @@ def test_MediaStore_validates_filetype(filetype, url, expected_filetype):
id="sort and enriches multiple tags",
),
pytest.param(
["cc0", "valid", "garbage:=metacrap", "uploaded:by=flickrmobile"],
[{"name": "valid", "provider": "test_provider"}],
["cc0", "valid", "garbage:=metacrap", "uploaded:by=flickrmobile", "frisby"],
[
{"name": "frisby", "provider": "test_provider"},
{"name": "valid", "provider": "test_provider"},
],
id="exclude tags by the denylist",
),
pytest.param("notalist", None, id="nonlist tags should be None"),
Expand Down
2 changes: 1 addition & 1 deletion ingestion_server/ingestion_server/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"uploaded:by=flickrmobile",
"uploaded:by=instagram",
"flickriosapp:filter=flamingo",
"by",
}

# Filter out tags that contain the following terms. All entrees should be
Expand All @@ -39,7 +40,6 @@
":",
"=",
"cc0",
"by",
"by-nc",
"by-nd",
"by-sa",
Expand Down

0 comments on commit 47fe5df

Please sign in to comment.