Skip to content

Commit

Permalink
Add backfillmoderationdecision management command (#4415)
Browse files Browse the repository at this point in the history
  • Loading branch information
krysal authored Jun 11, 2024
1 parent 36b693a commit 0094a1d
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 2 deletions.
121 changes: 121 additions & 0 deletions api/api/management/commands/backfillmoderationdecision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import argparse

from django.contrib.auth import get_user_model

from django_tqdm import BaseCommand

from api.constants.moderation import DecisionAction
from api.models import (
AudioDecision,
AudioDecisionThrough,
AudioReport,
ImageDecision,
ImageDecisionThrough,
ImageReport,
)
from api.models.media import DMCA, MATURE_FILTERED, NO_ACTION, PENDING


class Command(BaseCommand):
help = "Back-fill the moderation decision table for a given media type."
batch_size = 3

@staticmethod
def add_arguments(parser):
parser.add_argument(
"--dry-run",
help="Count reports to process, and don't do anything else.",
type=bool,
default=True,
action=argparse.BooleanOptionalAction,
)
parser.add_argument(
"--media-type",
help="The media type to back-fill moderation decisions.",
type=str,
default="image",
choices=["image", "audio"],
)
parser.add_argument(
"--moderator",
help="The username of the moderator to attribute the decisions to.",
type=str,
default="opener",
)

def handle(self, *args, **options):
dry = options["dry_run"]
username = options["moderator"]
media_type = options["media_type"]

MediaReport = ImageReport
MediaDecision = ImageDecision
MediaDecisionThrough = ImageDecisionThrough
if media_type == "audio":
MediaReport = AudioReport
MediaDecision = AudioDecision
MediaDecisionThrough = AudioDecisionThrough

non_pending_reports = MediaReport.objects.filter(decision=None).exclude(
status=PENDING
)
count_to_process = non_pending_reports.count()

if dry:
self.info(
f"{count_to_process} {media_type} reports to back-fill. "
f"This is a dry run, exiting without making changes."
)
return

if not count_to_process:
self.info("No reports to process.")
return

t = self.tqdm(total=count_to_process // self.batch_size)
User = get_user_model()
try:
moderator = User.objects.get(username=username)
except User.DoesNotExist:
t.error(f"User '{username}' not found.")
return

while reports_chunk := non_pending_reports[: self.batch_size]:
decisions = MediaDecision.objects.bulk_create(
MediaDecision(
action=self.get_action(report),
moderator=moderator,
notes="__backfilled_from_report_status",
)
for report in reports_chunk
)
for report, decision in zip(reports_chunk, decisions):
report.decision = decision
MediaReport.objects.bulk_update(reports_chunk, ["decision"])
MediaDecisionThrough.objects.bulk_create(
[
MediaDecisionThrough(media_obj=report.media_obj, decision=decision)
for report, decision in zip(reports_chunk, decisions)
]
)
t.update(1)

t.info(
self.style.SUCCESS(
f"Created {count_to_process} {media_type} moderation decisions from existing reports."
)
)

@staticmethod
def get_action(report):
if report.status == MATURE_FILTERED:
return DecisionAction.MARKED_SENSITIVE

if report.status == NO_ACTION:
return DecisionAction.REJECTED_REPORTS

# Cases with status = DEINDEXED
if report.reason == DMCA:
return DecisionAction.DEINDEXED_COPYRIGHT

return DecisionAction.DEINDEXED_SENSITIVE # For reasons MATURE and OTHER
8 changes: 6 additions & 2 deletions api/test/factory/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

from api.models.audio import Audio, AudioAddOn, AudioReport, SensitiveAudio
from test.factory.faker import Faker
from test.factory.models.media import IdentifierFactory, MediaFactory
from test.factory.models.media import (
IdentifierFactory,
MediaFactory,
MediaReportFactory,
)


class SensitiveAudioFactory(DjangoModelFactory):
Expand All @@ -29,7 +33,7 @@ class Meta:
waveform_peaks = Faker("waveform")


class AudioReportFactory(DjangoModelFactory):
class AudioReportFactory(MediaReportFactory):
class Meta:
model = AudioReport

Expand Down
6 changes: 6 additions & 0 deletions api/test/factory/models/oauth2.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from django.contrib.auth import get_user_model
from django.utils import timezone

import factory
Expand Down Expand Up @@ -67,3 +68,8 @@ class Meta:
tzinfo=timezone.get_current_timezone(),
)
application = factory.SubFactory(ThrottledApplicationFactory)


class UserFactory(DjangoModelFactory):
class Meta:
model = get_user_model()
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from io import StringIO

from django.core.management import call_command

import pytest

from api.constants.moderation import DecisionAction
from api.models import (
DEINDEXED,
DMCA,
MATURE,
MATURE_FILTERED,
NO_ACTION,
OTHER,
AudioDecision,
AudioDecisionThrough,
ImageDecision,
ImageDecisionThrough,
)
from test.factory.models.audio import AudioReportFactory
from test.factory.models.image import ImageReportFactory
from test.factory.models.oauth2 import UserFactory


def call_cmd(**options):
out = StringIO()
err = StringIO()
call_command(
"backfillmoderationdecision",
**options,
stdout=out,
stderr=err,
)
res = out.getvalue(), err.getvalue()
print(res)

return res


def make_reports(media_type, reason: str, status: str, count: int = 1):
if media_type == "audio":
return AudioReportFactory.create_batch(count, status=status, reason=reason)
else:
return ImageReportFactory.create_batch(count, status=status, reason=reason)


@pytest.mark.parametrize(
("reason", "status", "expected_action"),
(
(MATURE, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE),
(DMCA, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE),
(OTHER, MATURE_FILTERED, DecisionAction.MARKED_SENSITIVE),
(MATURE, NO_ACTION, DecisionAction.REJECTED_REPORTS),
(DMCA, NO_ACTION, DecisionAction.REJECTED_REPORTS),
(OTHER, NO_ACTION, DecisionAction.REJECTED_REPORTS),
(MATURE, DEINDEXED, DecisionAction.DEINDEXED_SENSITIVE),
(DMCA, DEINDEXED, DecisionAction.DEINDEXED_COPYRIGHT),
(OTHER, DEINDEXED, DecisionAction.DEINDEXED_SENSITIVE),
),
)
@pytest.mark.parametrize(("media_type"), ("image", "audio"))
@pytest.mark.django_db
def test_create_moderation_decision_for_reports(
media_type, reason, status, expected_action
):
username = "opener"
UserFactory.create(username=username)

report = make_reports(media_type=media_type, reason=reason, status=status)[0]

out, err = call_cmd(dry_run=False, media_type=media_type, moderator=username)

MediaDecision = ImageDecision if media_type == "image" else AudioDecision
MediaDecisionThrough = (
ImageDecisionThrough if media_type == "image" else AudioDecisionThrough
)
assert MediaDecision.objects.count() == 1
assert f"Created 1 {media_type} moderation decisions from existing reports." in out

decision = MediaDecision.objects.first()
assert decision.media_objs.count() == 1
assert decision.action == expected_action
assert decision.moderator.username == username

decision_through = MediaDecisionThrough.objects.first()
assert decision_through.media_obj == report.media_obj
assert decision_through.decision == decision


@pytest.mark.django_db
def test_catch_user_exception():
make_reports(media_type="image", reason=MATURE, status=MATURE_FILTERED)
_, err = call_cmd(dry_run=False, moderator="nonexistent")

assert "User 'nonexistent' not found." in err

0 comments on commit 0094a1d

Please sign in to comment.