Skip to content

Commit

Permalink
Add filter tests for data manager (#1279)
Browse files Browse the repository at this point in the history
* Update LSF, DM, react-app

* Add tests for data manager filters

* Old build of react app

* [fix] Show correct expected classes on import validation error

* [fix] Make "is empty" for strings working

* Fix tests and use KeyTextTransform from postgres

* Remove TextField from import

* [fix] is empty for all fields

* Revert String case
  • Loading branch information
makseq authored Aug 13, 2021
1 parent 92d0981 commit 95ba682
Show file tree
Hide file tree
Showing 7 changed files with 444 additions and 8 deletions.
20 changes: 17 additions & 3 deletions label_studio/data_manager/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from django.db import models
from django.db.models import Aggregate, Count, Exists, OuterRef, Subquery, Avg, Q, F, Value
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models.fields.json import KeyTransform
from django.contrib.postgres.fields.jsonb import KeyTextTransform
from django.db.models.functions import Coalesce
from django.conf import settings
from django.db.models.functions import Cast
Expand Down Expand Up @@ -94,7 +94,7 @@ def apply_ordering(queryset, ordering):

# annotate task with data field for float/int/bool ordering support
json_field = field_name.replace('data__', '')
queryset = queryset.annotate(ordering_field=KeyTransform(json_field, 'data'))
queryset = queryset.annotate(ordering_field=KeyTextTransform(json_field, 'data'))
f = F('ordering_field').asc(nulls_last=True) if ascending else F('ordering_field').desc(nulls_last=True)

else:
Expand Down Expand Up @@ -152,7 +152,7 @@ def apply_filters(queryset, filters):
json_field = field_name.replace('data__', '')
queryset = queryset.annotate(**{
f'filter_{json_field.replace("$undefined$", "undefined")}':
Cast(KeyTransform(json_field, 'data'), output_field=FloatField())
Cast(KeyTextTransform(json_field, 'data'), output_field=FloatField())
})
clean_field_name = f'filter_{json_field.replace("$undefined$", "undefined")}'
else:
Expand All @@ -164,6 +164,20 @@ def apply_filters(queryset, filters):
_filter.operator = 'equal' if cast_bool_from_str(_filter.value) else 'not_equal'
_filter.value = 0

# special case: for strings empty is "" or null=True
if _filter.type in ('String', 'Unknown') and _filter.operator == 'empty':
value = cast_bool_from_str(_filter.value)
if value: # empty = true
q = Q(
Q(**{field_name: ''}) | Q(**{field_name: None}) | Q(**{field_name+'__isnull': True})
)
else: # empty = false
q = Q(
~Q(**{field_name: ''}) & ~Q(**{field_name: None}) & ~Q(**{field_name+'__isnull': True})
)
filter_expression.add(q, conjunction)
continue

# append operator
field_name = f"{clean_field_name}{operators.get(_filter.operator, '')}"

Expand Down
8 changes: 4 additions & 4 deletions label_studio/tasks/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ def check_data(project, data):

expected_types = _DATA_TYPES.get(data_type, (str, ))
if not isinstance(data[data_key], tuple(expected_types)):
raise ValidationError('data["{data_key}"]={data_value} '
'is of type "{type}", '
'but types "{expected_types}" are expected'
raise ValidationError('data[\'{data_key}\']={data_value} is of type \'{type}\', '
"but the object tag {data_type} expects the following types: {expected_types}"
.format(data_key=data_key, data_value=data[data_key],
type=type(data[data_key]), expected_types=expected_types))
type=type(data[data_key]).__name__, data_type=data_type,
expected_types=[e.__name__ for e in expected_types]))

if data_type == 'List':
for item in data[data_key]:
Expand Down
267 changes: 267 additions & 0 deletions label_studio/tests/data_manager/filters/int.tavern.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
---
test_name: int-filter-greater
strict: false
marks:
- usefixtures:
- django_live_url
stages:

- id: signup
type: ref

- id: create_project
type: ref

- name: create_filter_tasks
request:
files:
json_file: tests/data_manager/filters/int_tasks.json
headers:
content-type: multipart/form-data
method: POST
url: '{django_live_url}/api/projects/{project_pk}/import'
response:
json:
annotation_count: 0
prediction_count: 1
task_count: 6
status_code: 201

- name: setup_views
request:
method: POST
url: '{django_live_url}/api/dm/views/?project={project_pk}'
json: {
"data": {
"columnsDisplayType": {
"tasks:data.int_field": "Number"
},
"filters": {
"conjunction": "and",
"items": [
{
"filter": "filter:tasks:data.int_field",
"operator": "greater",
"type": "Number",
"value": 5
}
]
},
"ordering": [
"tasks:data.int_field"
]
},
"project": "{project_pk}"
}
response:
save:
json:
view: "@"
status_code: 201

- name: get_tasks
request:
method: GET
url: '{django_live_url}/api/dm/views/{view.id}/tasks'
response:
json: {
"total_annotations": 0,
"total_predictions": 0,
"total": 3,
"tasks": [
{
"data": {
"int_field": 20,
"text": "x2"
}
},
{
"data": {
"int_field": 30,
"text": "x3"
}
},
{
"data": {
"int_field": "50",
"text": "x5"
}
}
]
}
status_code: 200



---
test_name: int-filter-between-in
strict: false
marks:
- usefixtures:
- django_live_url
stages:

- id: signup
type: ref

- id: create_project
type: ref

- name: create_filter_tasks
request:
files:
json_file: tests/data_manager/filters/int_tasks.json
headers:
content-type: multipart/form-data
method: POST
url: '{django_live_url}/api/projects/{project_pk}/import'
response:
json:
annotation_count: 0
prediction_count: 1
task_count: 6
status_code: 201

- name: setup_views
request:
method: POST
url: '{django_live_url}/api/dm/views/?project={project_pk}'
json: {
"data": {
"columnsDisplayType": {
"tasks:data.int_field": "Number"
},
"filters": {
"conjunction": "and",
"items": [
{
"filter": "filter:tasks:data.int_field",
"operator": "in",
"type": "Number",
"value": {
"min": 10,
"max": 40
}
}
]
},
"ordering": [
"tasks:data.int_field"
]
},
"project": "{project_pk}"
}
response:
save:
json:
view: "@"
status_code: 201

- name: get_tasks
request:
method: GET
url: '{django_live_url}/api/dm/views/{view.id}/tasks'
response:
json: {
"total_annotations": 0,
"total_predictions": 0,
"total": 2,
"tasks": [
{
"data": {
"int_field": 20,
"text": "x2"
}
},
{
"data": {
"int_field": 30,
"text": "x3"
}
}
]
}
status_code: 200


---
test_name: int-is-empty
strict: false
marks:
- usefixtures:
- django_live_url
stages:

- id: signup
type: ref

- id: create_project
type: ref

- name: create_filter_tasks
request:
files:
json_file: tests/data_manager/filters/int_tasks.json
headers:
content-type: multipart/form-data
method: POST
url: '{django_live_url}/api/projects/{project_pk}/import'
response:
json:
annotation_count: 0
prediction_count: 1
task_count: 6
status_code: 201

- name: setup_views
request:
method: POST
url: '{django_live_url}/api/dm/views/?project={project_pk}'
json: {
"data": {
"columnsDisplayType": {
"tasks:data.int_field": "Number"
},
"filters": {
"conjunction": "and",
"items": [
{
"filter": "filter:tasks:data.int_field",
"operator": "empty",
"type": "Number",
"value": "true"
}
]
},
"ordering": [
"tasks:data.int_field"
]
},
"project": "{project_pk}"
}
response:
save:
json:
view: "@"
status_code: 201

- name: get_tasks
request:
method: GET
url: '{django_live_url}/api/dm/views/{view.id}/tasks'
response:
json: {
"total_annotations": 0,
"total_predictions": 0,
"total": 1,
"tasks": [
{
"data": {
"text": "x6"
}
}
]
}
status_code: 200


45 changes: 45 additions & 0 deletions label_studio/tests/data_manager/filters/int_tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[
{
"data": {
"text": "Test example phrase 1",
"int_field": 1
},
"predictions": [
{
"result": [
{
"from_name": "text_class",
"to_name": "text",
"type": "choices",
"value": {
"choices": [
"class_A"
]
}
}
],
"model_version": "model_version_A"
}
]
},
{
"int_field": 20,
"text": "x2"
},
{
"int_field": 30,
"text": "x3"
},
{
"int_field": 4,
"text": "x4"
},
{
"int_field": "50",
"text": "x5"
},

{
"text": "x6"
}
]
Loading

0 comments on commit 95ba682

Please sign in to comment.