From 7ba5846d96d22b42ffaf64ffaea222265c535f86 Mon Sep 17 00:00:00 2001
From: Ben Epstein <ben@rungalileo.io>
Date: Wed, 2 Nov 2022 10:44:33 -0400
Subject: [PATCH] refactor tests (#434)

* refactor tests

* import watch from hf
---
 dataquality/__init__.py                       |  2 +-
 dataquality/integrations/hf.py                |  3 ++
 dataquality/metrics.py                        |  2 +
 tests/{ => clients}/test_api.py               |  2 +-
 tests/conftest.py                             | 12 +++---
 tests/{ => core}/test_auth.py                 |  5 ++-
 tests/{ => core}/test_cloud.py                |  0
 tests/{ => core}/test_config.py               |  2 +-
 tests/{ => core}/test_finish.py               |  0
 tests/{ => core}/test_init.py                 |  2 +-
 tests/inference/conftest.py                   | 10 -----
 tests/inference/test_inference.py             | 16 ++++++--
 ...ion.py => test_text_classification_inf.py} |  0
 ...{test_text_ner.py => test_text_ner_inf.py} |  0
 .../hf}/test_hf_integration.py                |  2 +-
 .../hf}/test_text_classification_hf.py        |  7 +++-
 .../spacy}/test_spacy_integration.py          |  0
 .../spacy}/test_spacy_ner.py                  |  4 +-
 .../{ => integrations/torch}/test_pt_utils.py |  0
 .../torch}/test_text_classification_pt.py     |  0
 tests/{ => loggers}/test_multi_label.py       |  0
 tests/{ => loggers}/test_ner.py               |  2 +-
 .../{ => loggers}/test_text_classification.py |  0
 .../{test_schema => schemas}/test_metrics.py  |  0
 tests/{ => schemas}/test_predicates.py        |  0
 tests/test_dataquality.py                     | 37 +++++++++++++++++-
 tests/test_logger.py                          | 39 -------------------
 tests/{inference => test_utils}/__init__.py   |  0
 tests/{utils => test_utils}/data_utils.py     |  0
 .../{utils => test_utils}/hf_datasets_mock.py |  0
 .../hf_integration_constants.py               |  0
 .../{utils => test_utils}/lightning_model.py  |  0
 tests/{utils => test_utils}/mock_request.py   |  0
 tests/{utils => test_utils}/ner_constants.py  |  0
 .../spacy_integration.py                      |  0
 .../spacy_integration_constants.py            |  1 +
 tests/utils/__init__.py                       |  0
 tests/{ => utils}/test_dq_logger.py           |  0
 tests/{ => utils}/test_name.py                |  0
 tests/{ => utils}/test_tf_version.py          |  0
 tests/{ => utils}/test_vaex_utils.py          |  0
 tests/{ => utils}/test_version.py             |  2 +-
 42 files changed, 79 insertions(+), 71 deletions(-)
 rename tests/{ => clients}/test_api.py (99%)
 rename tests/{ => core}/test_auth.py (92%)
 rename tests/{ => core}/test_cloud.py (100%)
 rename tests/{ => core}/test_config.py (98%)
 rename tests/{ => core}/test_finish.py (100%)
 rename tests/{ => core}/test_init.py (99%)
 delete mode 100644 tests/inference/conftest.py
 rename tests/inference/{test_text_classification.py => test_text_classification_inf.py} (100%)
 rename tests/inference/{test_text_ner.py => test_text_ner_inf.py} (100%)
 rename tests/{ => integrations/hf}/test_hf_integration.py (99%)
 rename tests/{ => integrations/hf}/test_text_classification_hf.py (98%)
 rename tests/{ => integrations/spacy}/test_spacy_integration.py (100%)
 rename tests/{ => integrations/spacy}/test_spacy_ner.py (98%)
 rename tests/{ => integrations/torch}/test_pt_utils.py (100%)
 rename tests/{ => integrations/torch}/test_text_classification_pt.py (100%)
 rename tests/{ => loggers}/test_multi_label.py (100%)
 rename tests/{ => loggers}/test_ner.py (99%)
 rename tests/{ => loggers}/test_text_classification.py (100%)
 rename tests/{test_schema => schemas}/test_metrics.py (100%)
 rename tests/{ => schemas}/test_predicates.py (100%)
 delete mode 100644 tests/test_logger.py
 rename tests/{inference => test_utils}/__init__.py (100%)
 rename tests/{utils => test_utils}/data_utils.py (100%)
 rename tests/{utils => test_utils}/hf_datasets_mock.py (100%)
 rename tests/{utils => test_utils}/hf_integration_constants.py (100%)
 rename tests/{utils => test_utils}/lightning_model.py (100%)
 rename tests/{utils => test_utils}/mock_request.py (100%)
 rename tests/{utils => test_utils}/ner_constants.py (100%)
 rename tests/{utils => test_utils}/spacy_integration.py (100%)
 rename tests/{utils => test_utils}/spacy_integration_constants.py (99%)
 delete mode 100644 tests/utils/__init__.py
 rename tests/{ => utils}/test_dq_logger.py (100%)
 rename tests/{ => utils}/test_name.py (100%)
 rename tests/{ => utils}/test_tf_version.py (100%)
 rename tests/{ => utils}/test_vaex_utils.py (100%)
 rename tests/{ => utils}/test_version.py (96%)

diff --git a/dataquality/__init__.py b/dataquality/__init__.py
index eb10fbc7c..7772ed81c 100644
--- a/dataquality/__init__.py
+++ b/dataquality/__init__.py
@@ -1,6 +1,6 @@
 "dataquality"
 
-__version__ = "v0.7.2"
+__version__ = "v0.7.3"
 
 import os
 import resource
diff --git a/dataquality/integrations/hf.py b/dataquality/integrations/hf.py
index b71b52751..c17c5bfbb 100644
--- a/dataquality/integrations/hf.py
+++ b/dataquality/integrations/hf.py
@@ -11,6 +11,9 @@
 from dataquality.analytics import Analytics
 from dataquality.clients.api import ApiClient
 from dataquality.exceptions import GalileoException, GalileoWarning
+
+# We add this here so users can `from dataquality.integrations.hf import watch`
+from dataquality.integrations.transformers_trainer import watch  # noqa: F401
 from dataquality.schemas.hf import HFCol
 from dataquality.schemas.ner import TaggingSchema
 from dataquality.schemas.split import conform_split
diff --git a/dataquality/metrics.py b/dataquality/metrics.py
index b4843c6a5..eaab9cdca 100644
--- a/dataquality/metrics.py
+++ b/dataquality/metrics.py
@@ -493,6 +493,8 @@ def _process_exported_dataframe(
             if data_df[col].ndim > 1:
                 return data_df
         pdf = data_df.to_pandas_df()
+        # The spans come back as json.dumps string data, we can load it for our users
+        # Back into JSON data so they get the actual span objects
         if task_type == TaskType.text_ner and "spans" in pdf.columns:
             pdf["spans"] = pdf["spans"].apply(json.loads)
         return pdf
diff --git a/tests/test_api.py b/tests/clients/test_api.py
similarity index 99%
rename from tests/test_api.py
rename to tests/clients/test_api.py
index 33431b5a6..d712fa9f2 100644
--- a/tests/test_api.py
+++ b/tests/clients/test_api.py
@@ -11,7 +11,7 @@
 from dataquality.clients.api import ApiClient
 from dataquality.exceptions import GalileoException
 from dataquality.schemas.task_type import TaskType
-from tests.utils.mock_request import (
+from tests.test_utils.mock_request import (
     EXISTING_PROJECT,
     EXISTING_RUN,
     FAKE_NEW_RUN,
diff --git a/tests/conftest.py b/tests/conftest.py
index e783cdc55..fbb373958 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -14,7 +14,7 @@
 from dataquality.loggers import BaseGalileoLogger
 from dataquality.schemas.task_type import TaskType
 from dataquality.utils.dq_logger import DQ_LOG_FILE_HOME
-from tests.utils.mock_request import MockResponse
+from tests.test_utils.mock_request import MockResponse
 
 DEFAULT_API_URL = "http://localhost:8088"
 DEFAULT_PROJECT_ID = UUID("399057bc-b276-4027-a5cf-48893ac45388")
@@ -52,7 +52,6 @@ def cleanup_after_use() -> Generator:
     for task_type in list(TaskType):
         dataquality.get_model_logger(task_type).logger_config.reset()
     try:
-        dataquality.get_model_logger().logger_config.reset()
         if os.path.isdir(BaseGalileoLogger.LOG_FILE_DIR):
             shutil.rmtree(BaseGalileoLogger.LOG_FILE_DIR)
         if not os.path.isdir(TEST_PATH):
@@ -63,9 +62,12 @@ def cleanup_after_use() -> Generator:
             os.makedirs(DQ_LOG_FILE_LOCATION)
         yield
     finally:
-        shutil.rmtree(BaseGalileoLogger.LOG_FILE_DIR)
-        shutil.rmtree(DQ_LOG_FILE_LOCATION)
-        dataquality.get_model_logger().logger_config.reset()
+        if os.path.exists(BaseGalileoLogger.LOG_FILE_DIR):
+            shutil.rmtree(BaseGalileoLogger.LOG_FILE_DIR)
+        if os.path.exists(DQ_LOG_FILE_LOCATION):
+            shutil.rmtree(DQ_LOG_FILE_LOCATION)
+        for task_type in list(TaskType):
+            dataquality.get_model_logger(task_type).logger_config.reset()
 
 
 @pytest.fixture()
diff --git a/tests/test_auth.py b/tests/core/test_auth.py
similarity index 92%
rename from tests/test_auth.py
rename to tests/core/test_auth.py
index 4e814553a..720de81fc 100644
--- a/tests/test_auth.py
+++ b/tests/core/test_auth.py
@@ -8,7 +8,10 @@
 import dataquality
 from dataquality.core.auth import GALILEO_AUTH_METHOD
 from dataquality.exceptions import GalileoException
-from tests.utils.mock_request import mocked_failed_login_requests, mocked_login_requests
+from tests.test_utils.mock_request import (
+    mocked_failed_login_requests,
+    mocked_login_requests,
+)
 
 config = dataquality.config
 
diff --git a/tests/test_cloud.py b/tests/core/test_cloud.py
similarity index 100%
rename from tests/test_cloud.py
rename to tests/core/test_cloud.py
diff --git a/tests/test_config.py b/tests/core/test_config.py
similarity index 98%
rename from tests/test_config.py
rename to tests/core/test_config.py
index 614ea2a77..1250b3d4f 100644
--- a/tests/test_config.py
+++ b/tests/core/test_config.py
@@ -16,7 +16,7 @@
     url_is_localhost,
 )
 from dataquality.exceptions import GalileoException
-from tests.utils.mock_request import MockResponse
+from tests.test_utils.mock_request import MockResponse
 
 
 def test_console_url(set_test_config: Callable) -> None:
diff --git a/tests/test_finish.py b/tests/core/test_finish.py
similarity index 100%
rename from tests/test_finish.py
rename to tests/core/test_finish.py
diff --git a/tests/test_init.py b/tests/core/test_init.py
similarity index 99%
rename from tests/test_init.py
rename to tests/core/test_init.py
index a556900e0..312bbf34e 100644
--- a/tests/test_init.py
+++ b/tests/core/test_init.py
@@ -12,7 +12,7 @@
 from dataquality.core.init import _Init
 from dataquality.exceptions import GalileoException
 from tests.exceptions import LoginInvoked
-from tests.utils.mock_request import (
+from tests.test_utils.mock_request import (
     EXISTING_PROJECT,
     EXISTING_RUN,
     MockResponse,
diff --git a/tests/inference/conftest.py b/tests/inference/conftest.py
deleted file mode 100644
index 2d5da588f..000000000
--- a/tests/inference/conftest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pytest
-
-import dataquality
-
-
-@pytest.fixture(autouse=True)
-def reset_logger() -> None:
-    # Only need to set one of data and models loggers since
-    # they use the same logger config
-    dataquality.get_data_logger().logger_config.reset()
diff --git a/tests/inference/test_inference.py b/tests/inference/test_inference.py
index 37150f2cc..08caa1fa0 100644
--- a/tests/inference/test_inference.py
+++ b/tests/inference/test_inference.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import List, Type
+from typing import Callable, List, Type
 from unittest import mock
 
 import pytest
@@ -13,7 +13,9 @@
 
 
 class TestSetSplitInference:
-    def test_set_split_inference(self) -> None:
+    def test_set_split_inference(
+        self, set_test_config: Callable, cleanup_after_use: Callable
+    ) -> None:
         assert not dataquality.get_data_logger().logger_config.inference_logged
         dataquality.set_split("inference", "all-customers")
         assert dataquality.get_data_logger().logger_config.cur_split == "inference"
@@ -22,7 +24,9 @@ def test_set_split_inference(self) -> None:
             == "all-customers"
         )
 
-    def test_set_split_inference_missing_inference_name(self) -> None:
+    def test_set_split_inference_missing_inference_name(
+        self, set_test_config: Callable, cleanup_after_use: Callable
+    ) -> None:
         with pytest.raises(ValidationError) as e:
             dataquality.set_split("inference")
 
@@ -89,7 +93,11 @@ def test_base_model_logger_validate_inference_missing_inference_name(self) -> No
         return_value="1234-abcd-5678",
     )
     def test_write_model_output_inference(
-        self, mock_uuid: mock.MagicMock, mock_save_file: mock.MagicMock
+        self,
+        mock_uuid: mock.MagicMock,
+        mock_save_file: mock.MagicMock,
+        set_test_config: Callable,
+        cleanup_after_use: Callable,
     ) -> None:
         inference_data = {
             "epoch": [None, None, None],
diff --git a/tests/inference/test_text_classification.py b/tests/inference/test_text_classification_inf.py
similarity index 100%
rename from tests/inference/test_text_classification.py
rename to tests/inference/test_text_classification_inf.py
diff --git a/tests/inference/test_text_ner.py b/tests/inference/test_text_ner_inf.py
similarity index 100%
rename from tests/inference/test_text_ner.py
rename to tests/inference/test_text_ner_inf.py
diff --git a/tests/test_hf_integration.py b/tests/integrations/hf/test_hf_integration.py
similarity index 99%
rename from tests/test_hf_integration.py
rename to tests/integrations/hf/test_hf_integration.py
index da87c9da2..4a67bed2e 100644
--- a/tests/test_hf_integration.py
+++ b/tests/integrations/hf/test_hf_integration.py
@@ -17,7 +17,7 @@
 )
 from dataquality.schemas.ner import TaggingSchema
 from dataquality.utils.hf_tokenizer import extract_gold_spans_at_word_level
-from tests.utils.hf_integration_constants import (
+from tests.test_utils.hf_integration_constants import (
     ADJUSTED_TOKEN_DATA,
     UNADJUSTED_TOKEN_DATA,
     BILOUSequence,
diff --git a/tests/test_text_classification_hf.py b/tests/integrations/hf/test_text_classification_hf.py
similarity index 98%
rename from tests/test_text_classification_hf.py
rename to tests/integrations/hf/test_text_classification_hf.py
index 5c9ebeabb..1ada2496c 100644
--- a/tests/test_text_classification_hf.py
+++ b/tests/integrations/hf/test_text_classification_hf.py
@@ -21,8 +21,11 @@
 from dataquality.schemas.task_type import TaskType
 from dataquality.utils.thread_pool import ThreadPoolManager
 from tests.conftest import LOCATION
-from tests.utils.hf_datasets_mock import mock_dataset, mock_dataset_repeat
-from tests.utils.mock_request import mocked_create_project_run, mocked_get_project_run
+from tests.test_utils.hf_datasets_mock import mock_dataset, mock_dataset_repeat
+from tests.test_utils.mock_request import (
+    mocked_create_project_run,
+    mocked_get_project_run,
+)
 
 # Load models locally
 try:
diff --git a/tests/test_spacy_integration.py b/tests/integrations/spacy/test_spacy_integration.py
similarity index 100%
rename from tests/test_spacy_integration.py
rename to tests/integrations/spacy/test_spacy_integration.py
diff --git a/tests/test_spacy_ner.py b/tests/integrations/spacy/test_spacy_ner.py
similarity index 98%
rename from tests/test_spacy_ner.py
rename to tests/integrations/spacy/test_spacy_ner.py
index 6d4ee6989..93595e085 100644
--- a/tests/test_spacy_ner.py
+++ b/tests/integrations/spacy/test_spacy_ner.py
@@ -24,8 +24,8 @@
 from dataquality.schemas.task_type import TaskType
 from dataquality.utils.thread_pool import ThreadPoolManager
 from tests.conftest import LOCATION
-from tests.utils.spacy_integration import load_ner_data_from_local, train_model
-from tests.utils.spacy_integration_constants import (
+from tests.test_utils.spacy_integration import load_ner_data_from_local, train_model
+from tests.test_utils.spacy_integration_constants import (
     LONG_SHORT_DATA,
     LONG_TRAIN_DATA,
     MISALIGNED_SPAN_DATA,
diff --git a/tests/test_pt_utils.py b/tests/integrations/torch/test_pt_utils.py
similarity index 100%
rename from tests/test_pt_utils.py
rename to tests/integrations/torch/test_pt_utils.py
diff --git a/tests/test_text_classification_pt.py b/tests/integrations/torch/test_text_classification_pt.py
similarity index 100%
rename from tests/test_text_classification_pt.py
rename to tests/integrations/torch/test_text_classification_pt.py
diff --git a/tests/test_multi_label.py b/tests/loggers/test_multi_label.py
similarity index 100%
rename from tests/test_multi_label.py
rename to tests/loggers/test_multi_label.py
diff --git a/tests/test_ner.py b/tests/loggers/test_ner.py
similarity index 99%
rename from tests/test_ner.py
rename to tests/loggers/test_ner.py
index 9cfe0aa67..2e4aa9e40 100644
--- a/tests/test_ner.py
+++ b/tests/loggers/test_ner.py
@@ -18,7 +18,7 @@
 from dataquality.schemas.task_type import TaskType
 from dataquality.utils.thread_pool import ThreadPoolManager
 from tests.conftest import TEST_PATH
-from tests.utils.ner_constants import (
+from tests.test_utils.ner_constants import (
     GOLD_SPANS,
     LABELS,
     NER_INPUT_DATA,
diff --git a/tests/test_text_classification.py b/tests/loggers/test_text_classification.py
similarity index 100%
rename from tests/test_text_classification.py
rename to tests/loggers/test_text_classification.py
diff --git a/tests/test_schema/test_metrics.py b/tests/schemas/test_metrics.py
similarity index 100%
rename from tests/test_schema/test_metrics.py
rename to tests/schemas/test_metrics.py
diff --git a/tests/test_predicates.py b/tests/schemas/test_predicates.py
similarity index 100%
rename from tests/test_predicates.py
rename to tests/schemas/test_predicates.py
diff --git a/tests/test_dataquality.py b/tests/test_dataquality.py
index 46ae5cb1a..849332d27 100644
--- a/tests/test_dataquality.py
+++ b/tests/test_dataquality.py
@@ -16,14 +16,16 @@
 import dataquality.core._config
 import dataquality.core.finish
 from dataquality.exceptions import GalileoException, GalileoWarning, LogBatchError
+from dataquality.loggers import BaseGalileoLogger
 from dataquality.loggers.data_logger import BaseGalileoDataLogger
+from dataquality.loggers.model_logger import BaseGalileoModelLogger
 from dataquality.loggers.model_logger.text_classification import (
     TextClassificationModelLogger,
 )
 from dataquality.schemas.task_type import TaskType
 from dataquality.utils.thread_pool import ThreadPoolManager
 from tests.conftest import TEST_PATH
-from tests.utils.data_utils import (
+from tests.test_utils.data_utils import (
     NUM_LOGS,
     NUM_RECORDS,
     _log_text_classification_data,
@@ -751,3 +753,36 @@ def test_cloud_restricts_inference_mode(mock_cloud: MagicMock) -> None:
         "accounts can access this feature. Please email us at team@rungalileo.io for "
         "more information."
     )
+
+
+def test_attribute_subsets() -> None:
+    """All potential logging fields used by all subclass loggers should be encapsulated
+
+    Any new logger that is created has a set of attributes that it expects from users.
+    The `BaseLoggerAttributes` from the BaseGalileoLogger should be the superset of
+    all child loggers.
+    """
+    all_attrs = set(BaseGalileoLogger.get_valid_attributes())
+    sub_data_loggers = BaseGalileoDataLogger.__subclasses__()
+    data_logger_attrs = set(
+        [j for i in sub_data_loggers for j in i.get_valid_attributes()]
+    )
+    sub_model_loggers = BaseGalileoModelLogger.__subclasses__()
+    model_logger_attrs = set(
+        [j for i in sub_model_loggers for j in i.get_valid_attributes()]
+    )
+    all_sub_attrs = data_logger_attrs.union(model_logger_attrs)
+    assert all_attrs.issuperset(
+        all_sub_attrs
+    ), f"Missing attrs: {all_sub_attrs - all_attrs}"
+
+
+def test_int_labels(set_test_config: Callable) -> None:
+    dataquality.set_labels_for_run(labels=[1, 2, 3, 4, 5])  # type: ignore
+    assert dataquality.get_data_logger().logger_config.labels == [
+        "1",
+        "2",
+        "3",
+        "4",
+        "5",
+    ]
diff --git a/tests/test_logger.py b/tests/test_logger.py
deleted file mode 100644
index 91dc5a3d1..000000000
--- a/tests/test_logger.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from typing import Callable
-
-import dataquality
-from dataquality.loggers import BaseGalileoLogger
-from dataquality.loggers.data_logger import BaseGalileoDataLogger
-from dataquality.loggers.model_logger import BaseGalileoModelLogger
-
-
-def test_attribute_subsets() -> None:
-    """All potential logging fields used by all subclass loggers should be encapsulated
-
-    Any new logger that is created has a set of attributes that it expects from users.
-    The `BaseLoggerAttributes` from the BaseGalileoLogger should be the superset of
-    all child loggers.
-    """
-    all_attrs = set(BaseGalileoLogger.get_valid_attributes())
-    sub_data_loggers = BaseGalileoDataLogger.__subclasses__()
-    data_logger_attrs = set(
-        [j for i in sub_data_loggers for j in i.get_valid_attributes()]
-    )
-    sub_model_loggers = BaseGalileoModelLogger.__subclasses__()
-    model_logger_attrs = set(
-        [j for i in sub_model_loggers for j in i.get_valid_attributes()]
-    )
-    all_sub_attrs = data_logger_attrs.union(model_logger_attrs)
-    assert all_attrs.issuperset(
-        all_sub_attrs
-    ), f"Missing attrs: {all_sub_attrs - all_attrs}"
-
-
-def test_int_labels(set_test_config: Callable) -> None:
-    dataquality.set_labels_for_run(labels=[1, 2, 3, 4, 5])  # type: ignore
-    assert dataquality.get_data_logger().logger_config.labels == [
-        "1",
-        "2",
-        "3",
-        "4",
-        "5",
-    ]
diff --git a/tests/inference/__init__.py b/tests/test_utils/__init__.py
similarity index 100%
rename from tests/inference/__init__.py
rename to tests/test_utils/__init__.py
diff --git a/tests/utils/data_utils.py b/tests/test_utils/data_utils.py
similarity index 100%
rename from tests/utils/data_utils.py
rename to tests/test_utils/data_utils.py
diff --git a/tests/utils/hf_datasets_mock.py b/tests/test_utils/hf_datasets_mock.py
similarity index 100%
rename from tests/utils/hf_datasets_mock.py
rename to tests/test_utils/hf_datasets_mock.py
diff --git a/tests/utils/hf_integration_constants.py b/tests/test_utils/hf_integration_constants.py
similarity index 100%
rename from tests/utils/hf_integration_constants.py
rename to tests/test_utils/hf_integration_constants.py
diff --git a/tests/utils/lightning_model.py b/tests/test_utils/lightning_model.py
similarity index 100%
rename from tests/utils/lightning_model.py
rename to tests/test_utils/lightning_model.py
diff --git a/tests/utils/mock_request.py b/tests/test_utils/mock_request.py
similarity index 100%
rename from tests/utils/mock_request.py
rename to tests/test_utils/mock_request.py
diff --git a/tests/utils/ner_constants.py b/tests/test_utils/ner_constants.py
similarity index 100%
rename from tests/utils/ner_constants.py
rename to tests/test_utils/ner_constants.py
diff --git a/tests/utils/spacy_integration.py b/tests/test_utils/spacy_integration.py
similarity index 100%
rename from tests/utils/spacy_integration.py
rename to tests/test_utils/spacy_integration.py
diff --git a/tests/utils/spacy_integration_constants.py b/tests/test_utils/spacy_integration_constants.py
similarity index 99%
rename from tests/utils/spacy_integration_constants.py
rename to tests/test_utils/spacy_integration_constants.py
index ddf672477..8f7f42c02 100644
--- a/tests/utils/spacy_integration_constants.py
+++ b/tests/test_utils/spacy_integration_constants.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 import numpy as np
 import pandas as pd
 
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/test_dq_logger.py b/tests/utils/test_dq_logger.py
similarity index 100%
rename from tests/test_dq_logger.py
rename to tests/utils/test_dq_logger.py
diff --git a/tests/test_name.py b/tests/utils/test_name.py
similarity index 100%
rename from tests/test_name.py
rename to tests/utils/test_name.py
diff --git a/tests/test_tf_version.py b/tests/utils/test_tf_version.py
similarity index 100%
rename from tests/test_tf_version.py
rename to tests/utils/test_tf_version.py
diff --git a/tests/test_vaex_utils.py b/tests/utils/test_vaex_utils.py
similarity index 100%
rename from tests/test_vaex_utils.py
rename to tests/utils/test_vaex_utils.py
diff --git a/tests/test_version.py b/tests/utils/test_version.py
similarity index 96%
rename from tests/test_version.py
rename to tests/utils/test_version.py
index 683a5df05..e7d62ba32 100644
--- a/tests/test_version.py
+++ b/tests/utils/test_version.py
@@ -6,7 +6,7 @@
 from dataquality import __version__
 from dataquality.exceptions import GalileoException
 from dataquality.utils import version
-from tests.utils.mock_request import (
+from tests.test_utils.mock_request import (
     mocked_healthcheck_request,
     mocked_healthcheck_request_new_api_version,
 )