Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix fix_mediaformat command #228

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion server/cp/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from . import fix_language # noqa
from . import fix_mediaformat # noqa
from . import fix_mediaformat # noqa
6 changes: 4 additions & 2 deletions server/cp/commands/fix_language.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import time

from superdesk import get_resource_service
Expand All @@ -9,7 +8,10 @@
def fix_language(resource="items", limit=50, sleep_secs=2):
service = get_resource_service(resource)

source = {"query": {"terms": {"language": ["en-CA", "en-US", "fr-CA"]}}, "size": 100}
source = {
"query": {"terms": {"language": ["en-CA", "en-US", "fr-CA"]}},
"size": 100,
}

for i in range(int(limit)):
items = service.search(source)
Expand Down
36 changes: 27 additions & 9 deletions server/cp/commands/fix_mediaformat.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,53 @@
import time

from typing import get_args
from superdesk import get_resource_service
from cp.signals import get_media_type_name, get_media_type_scheme
from cp.signals import get_media_type_name, get_media_type_scheme, MediaType
from newsroom.commands.manager import manager


@manager.command
def fix_mediaformat(resource="items", limit=500, sleep_secs=2):
def fix_mediaformat(
resource="items", query="", code="wireaudio", limit=500, sleep_secs=2, dry_run=False
):
if not query:
print("Please provide a query to filter the items.")
return
if code not in get_args(MediaType):
print("Invalid media type code.")
return
service = get_resource_service(resource)
media_type_scheme = get_media_type_scheme()
source = {
"query": {
"bool": {"must_not": {"term": {"subject.scheme": media_type_scheme}}}
"bool": {"must": {"query_string": {"query": query}}},
},
"size": 100,
"from": 0,
}
for i in range(int(limit)):
items = service.search(source)
if not items.count():
for i in range(0, int(limit), source["size"]):
source["from"] = i
items = list(service.search(source))
if not len(items):
break
for item in items:
updates = {"subject": item["subject"].copy() if item.get("subject") else []}
updates["subject"] = [
s for s in updates["subject"] if s.get("scheme") != media_type_scheme
]
updates["subject"].append(
dict(
code="wiretext",
name=get_media_type_name("wiretext", item.get("language")),
code=code,
name=get_media_type_name(code, item.get("language")),
scheme=media_type_scheme,
)
)

service.system_update(item["_id"], updates, item)
if dry_run:
print("Would update", item["_id"], "with", updates)
else:
print("Updating", item["_id"])
service.system_update(item["_id"], updates, item)
print(".", end="", flush=True)
time.sleep(int(sleep_secs))
print("done.")
16 changes: 12 additions & 4 deletions server/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ def test_fix_mediaformat(app):
app.data.insert(
"items",
[
{"_id": "en", "language": "en", "type": "text"},
{"_id": "fr", "language": "fr", "type": "text"},
{"_id": "en", "language": "en", "type": "text", "headline": "foo"},
{"_id": "fr", "language": "fr", "type": "text", "headline": "bar"},
],
)

fix_mediaformat()
fix_mediaformat(query="headline:foo", code="wiretext", sleep_secs=0)

en_item = app.data.find_one("items", req=None, _id="en")
assert "subject" in en_item
Expand All @@ -20,4 +20,12 @@ def test_fix_mediaformat(app):
assert "mediaformat" == en_item["subject"][0]["scheme"]

fr_item = app.data.find_one("items", req=None, _id="fr")
assert "Texte fil de presse" == fr_item["subject"][0]["name"]
assert "subject" not in fr_item, "Should not add subject to non-matching item"

fix_mediaformat(query="headline:bar", code="wireaudio", sleep_secs=0)

fr_item = app.data.find_one("items", req=None, _id="fr")
assert "subject" in fr_item
assert "wireaudio" == fr_item["subject"][0]["code"]
assert "Audio fil de presse" == fr_item["subject"][0]["name"]
assert "mediaformat" == fr_item["subject"][0]["scheme"]
Loading