Add filter tests for data manager (#1279)

* Update LSF, DM, react-app * Add tests for data manager filters * Old build of react app * [fix] Show correct expected classes on import validation error * [fix] Make "is empty" for strings working * Fix tests and use KeyTextTransform from postgres * Remove TextField from import * [fix] is empty for all fields * Revert String case
HumanSignal · Aug 13, 2021 · 95ba682 · 95ba682
1 parent 92d0981
commit 95ba682
Show file tree

Hide file tree

Showing 7 changed files with 444 additions and 8 deletions.
diff --git a/label_studio/data_manager/managers.py b/label_studio/data_manager/managers.py
@@ -6,7 +6,7 @@
 from django.db import models
 from django.db.models import Aggregate, Count, Exists, OuterRef, Subquery, Avg, Q, F, Value
 from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models.fields.json import KeyTransform
+from django.contrib.postgres.fields.jsonb import KeyTextTransform
 from django.db.models.functions import Coalesce
 from django.conf import settings
 from django.db.models.functions import Cast
@@ -94,7 +94,7 @@ def apply_ordering(queryset, ordering):
 
             # annotate task with data field for float/int/bool ordering support
             json_field = field_name.replace('data__', '')
-            queryset = queryset.annotate(ordering_field=KeyTransform(json_field, 'data'))
+            queryset = queryset.annotate(ordering_field=KeyTextTransform(json_field, 'data'))
             f = F('ordering_field').asc(nulls_last=True) if ascending else F('ordering_field').desc(nulls_last=True)
 
         else:
@@ -152,7 +152,7 @@ def apply_filters(queryset, filters):
             json_field = field_name.replace('data__', '')
             queryset = queryset.annotate(**{
                 f'filter_{json_field.replace("$undefined$", "undefined")}':
-                    Cast(KeyTransform(json_field, 'data'), output_field=FloatField())
+                    Cast(KeyTextTransform(json_field, 'data'), output_field=FloatField())
             })
             clean_field_name = f'filter_{json_field.replace("$undefined$", "undefined")}'
         else:
@@ -164,6 +164,20 @@ def apply_filters(queryset, filters):
             _filter.operator = 'equal' if cast_bool_from_str(_filter.value) else 'not_equal'
             _filter.value = 0
 
+        # special case: for strings empty is "" or null=True
+        if _filter.type in ('String', 'Unknown') and _filter.operator == 'empty':
+            value = cast_bool_from_str(_filter.value)
+            if value:  # empty = true
+                q = Q(
+                    Q(**{field_name: ''}) | Q(**{field_name: None}) | Q(**{field_name+'__isnull': True})
+                )
+            else:  # empty = false
+                q = Q(
+                    ~Q(**{field_name: ''}) & ~Q(**{field_name: None}) & ~Q(**{field_name+'__isnull': True})
+                )
+            filter_expression.add(q, conjunction)
+            continue
+
         # append operator
         field_name = f"{clean_field_name}{operators.get(_filter.operator, '')}"
 

diff --git a/label_studio/tasks/validation.py b/label_studio/tasks/validation.py
@@ -51,11 +51,11 @@ def check_data(project, data):
 
             expected_types = _DATA_TYPES.get(data_type, (str, ))
             if not isinstance(data[data_key], tuple(expected_types)):
-                raise ValidationError('data["{data_key}"]={data_value} '
-                                      'is of type "{type}", '
-                                      'but types "{expected_types}" are expected'
+                raise ValidationError('data[\'{data_key}\']={data_value} is of type \'{type}\', '
+                                      "but the object tag {data_type} expects the following types: {expected_types}"
                                       .format(data_key=data_key, data_value=data[data_key],
-                                              type=type(data[data_key]), expected_types=expected_types))
+                                              type=type(data[data_key]).__name__, data_type=data_type,
+                                              expected_types=[e.__name__ for e in expected_types]))
 
             if data_type == 'List':
                 for item in data[data_key]:

diff --git a/label_studio/tests/data_manager/filters/int.tavern.yml b/label_studio/tests/data_manager/filters/int.tavern.yml
@@ -0,0 +1,267 @@
+---
+test_name: int-filter-greater
+strict: false
+marks:
+  - usefixtures:
+      - django_live_url
+stages:
+
+  - id: signup
+    type: ref
+
+  - id: create_project
+    type: ref
+
+  - name: create_filter_tasks
+    request:
+      files:
+        json_file: tests/data_manager/filters/int_tasks.json
+      headers:
+        content-type: multipart/form-data
+      method: POST
+      url: '{django_live_url}/api/projects/{project_pk}/import'
+    response:
+      json:
+        annotation_count: 0
+        prediction_count: 1
+        task_count: 6
+      status_code: 201
+
+  - name: setup_views
+    request:
+      method: POST
+      url: '{django_live_url}/api/dm/views/?project={project_pk}'
+      json: {
+        "data": {
+          "columnsDisplayType": {
+            "tasks:data.int_field": "Number"
+          },
+          "filters": {
+            "conjunction": "and",
+            "items": [
+            {
+              "filter": "filter:tasks:data.int_field",
+              "operator": "greater",
+              "type": "Number",
+              "value": 5
+            }
+            ]
+          },
+          "ordering": [
+            "tasks:data.int_field"
+          ]
+        },
+        "project": "{project_pk}"
+      }
+    response:
+      save:
+        json:
+          view: "@"
+      status_code: 201
+
+  - name: get_tasks
+    request:
+      method: GET
+      url: '{django_live_url}/api/dm/views/{view.id}/tasks'
+    response:
+      json: {
+        "total_annotations": 0,
+        "total_predictions": 0,
+        "total": 3,
+        "tasks": [
+        {
+          "data": {
+            "int_field": 20,
+            "text": "x2"
+          }
+        },
+        {
+          "data": {
+            "int_field": 30,
+            "text": "x3"
+          }
+        },
+        {
+          "data": {
+            "int_field": "50",
+            "text": "x5"
+          }
+        }
+        ]
+      }
+      status_code: 200
+
+
+
+---
+test_name: int-filter-between-in
+strict: false
+marks:
+  - usefixtures:
+      - django_live_url
+stages:
+
+  - id: signup
+    type: ref
+
+  - id: create_project
+    type: ref
+
+  - name: create_filter_tasks
+    request:
+      files:
+        json_file: tests/data_manager/filters/int_tasks.json
+      headers:
+        content-type: multipart/form-data
+      method: POST
+      url: '{django_live_url}/api/projects/{project_pk}/import'
+    response:
+      json:
+        annotation_count: 0
+        prediction_count: 1
+        task_count: 6
+      status_code: 201
+
+  - name: setup_views
+    request:
+      method: POST
+      url: '{django_live_url}/api/dm/views/?project={project_pk}'
+      json: {
+        "data": {
+          "columnsDisplayType": {
+            "tasks:data.int_field": "Number"
+          },
+          "filters": {
+            "conjunction": "and",
+            "items": [
+            {
+              "filter": "filter:tasks:data.int_field",
+              "operator": "in",
+              "type": "Number",
+              "value": {
+                "min": 10,
+                "max": 40
+              }
+            }
+            ]
+          },
+          "ordering": [
+            "tasks:data.int_field"
+          ]
+        },
+        "project": "{project_pk}"
+      }
+    response:
+      save:
+        json:
+          view: "@"
+      status_code: 201
+
+  - name: get_tasks
+    request:
+      method: GET
+      url: '{django_live_url}/api/dm/views/{view.id}/tasks'
+    response:
+      json: {
+        "total_annotations": 0,
+        "total_predictions": 0,
+        "total": 2,
+        "tasks": [
+        {
+          "data": {
+            "int_field": 20,
+            "text": "x2"
+          }
+        },
+        {
+          "data": {
+            "int_field": 30,
+            "text": "x3"
+          }
+        }
+        ]
+      }
+      status_code: 200
+
+
+---
+test_name: int-is-empty
+strict: false
+marks:
+  - usefixtures:
+      - django_live_url
+stages:
+
+  - id: signup
+    type: ref
+
+  - id: create_project
+    type: ref
+
+  - name: create_filter_tasks
+    request:
+      files:
+        json_file: tests/data_manager/filters/int_tasks.json
+      headers:
+        content-type: multipart/form-data
+      method: POST
+      url: '{django_live_url}/api/projects/{project_pk}/import'
+    response:
+      json:
+        annotation_count: 0
+        prediction_count: 1
+        task_count: 6
+      status_code: 201
+
+  - name: setup_views
+    request:
+      method: POST
+      url: '{django_live_url}/api/dm/views/?project={project_pk}'
+      json: {
+        "data": {
+          "columnsDisplayType": {
+            "tasks:data.int_field": "Number"
+          },
+          "filters": {
+            "conjunction": "and",
+            "items": [
+            {
+              "filter": "filter:tasks:data.int_field",
+              "operator": "empty",
+              "type": "Number",
+              "value": "true"
+            }
+            ]
+          },
+          "ordering": [
+            "tasks:data.int_field"
+          ]
+        },
+        "project": "{project_pk}"
+      }
+    response:
+      save:
+        json:
+          view: "@"
+      status_code: 201
+
+  - name: get_tasks
+    request:
+      method: GET
+      url: '{django_live_url}/api/dm/views/{view.id}/tasks'
+    response:
+      json: {
+        "total_annotations": 0,
+        "total_predictions": 0,
+        "total": 1,
+        "tasks": [
+          {
+            "data": {
+              "text": "x6"
+            }
+          }
+        ]
+      }
+      status_code: 200
+
+
diff --git a/label_studio/tests/data_manager/filters/int_tasks.json b/label_studio/tests/data_manager/filters/int_tasks.json
@@ -0,0 +1,45 @@
+[
+  {
+    "data": {
+      "text": "Test example phrase 1",
+      "int_field": 1
+    },
+    "predictions": [
+      {
+        "result": [
+          {
+            "from_name": "text_class",
+            "to_name": "text",
+            "type": "choices",
+            "value": {
+              "choices": [
+                "class_A"
+              ]
+            }
+          }
+        ],
+        "model_version": "model_version_A"
+      }
+    ]
+  },
+  {
+    "int_field": 20,
+    "text": "x2"
+  },
+  {
+    "int_field": 30,
+    "text": "x3"
+  },
+  {
+    "int_field": 4,
+    "text": "x4"
+  },
+  {
+    "int_field": "50",
+    "text": "x5"
+  },
+
+  {
+   "text": "x6"
+  }
+]