diff --git a/.github/workflows/mr_ci_text_spotting.yml b/.github/workflows/mr_ci_text_spotting.yml new file mode 100644 index 00000000..7e40217c --- /dev/null +++ b/.github/workflows/mr_ci_text_spotting.yml @@ -0,0 +1,89 @@ +--- +name: Units Tests - Text Spotting + +on: [push] + +# Run linter with github actions for quick feedbacks. +jobs: + + macos_tests: + runs-on: macos-latest + # run on PRs, or commits to facebookresearch (not internal) + strategy: + fail-fast: false + matrix: + torch: ["1.13.1", "2.2.2"] + include: + - torch: "1.13.1" + torchvision: "0.14.1" + - torch: "2.2.2" + torchvision: "0.17.2" + + env: + # point datasets to ~/.torch so it's cached by CI + DETECTRON2_DATASETS: ~/.torch/datasets + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Update pip + run: | + python -m ensurepip + python -m pip install --upgrade pip + + - name: Install dependencies + run: | + python -m pip install -U pip + python -m pip install wheel ninja opencv-python-headless onnx pytest-xdist + python -m pip install numpy==1.26.4 + python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html + # install from github to get latest; install iopath first since fvcore depends on it + python -m pip install -U 'git+https://github.com/facebookresearch/iopath' + python -m pip install -U 'git+https://github.com/facebookresearch/fvcore' + wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py + python collect_env.py + + - name: Build and install + run: | + CC=clang CXX=clang++ python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' + python -m detectron2.utils.collect_env + python -m pip install ".[dev]" + + - name: Install DPText-DETR + run: | + git clone https://github.com/maps-as-data/DPText-DETR.git + python -m pip install 'git+https://github.com/maps-as-data/DPText-DETR.git' # Install DPText-DETR + python -m pip install numpy==1.26.4 + wget https://huggingface.co/rwood-97/DPText_DETR_ArT_R_50_poly/resolve/main/art_final.pth + + - name: Run DPText-DETR unittests + run: | + python -m pytest test_text_spotting/test_dptext_runner.py + + + - name: Install DeepSolo + run: | + git clone https://github.com/maps-as-data/DeepSolo.git + python -m pip install 'git+https://github.com/maps-as-data/DeepSolo.git' --force-reinstall --no-deps # Install DeepSolo + python -m pip install numpy==1.26.4 + wget https://huggingface.co/rwood-97/DeepSolo_ic15_res50/resolve/main/ic15_res50_finetune_synth-tt-mlt-13-15-textocr.pth + + - name: Run DeepSolo unittests + run: | + python -m pytest test_text_spotting/test_deepsolo_runner.py + + - name: Install MapTextPipeline + run: | + git clone https://github.com/maps-as-data/MapTextPipeline.git + python -m pip install 'git+https://github.com/maps-as-data/MapTextPipeline.git' --force-reinstall --no-deps # Install MapTextPipeline + python -m pip install "numpy<2.0.0" + wget https://huggingface.co/rwood-97/MapTextPipeline_rumsey/resolve/main/rumsey-finetune.pth + + - name: Run MapTextPipeline unittests + run: | + python -m pytest test_text_spotting/test_maptext_runner.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e72137e0..c828444e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ _ADD NEW CHANGES HERE_ - All file loading methods now support `pathlib.Path` and `gpd.GeoDataFrame` objects as input ([#495](https://github.com/maps-as-data/MapReader/pull/495)) - Loading of dataframes from GeoJSON files now supported in many file loading methods (e.g. `add_metadata`, `Annotator.__init__`, `AnnotationsLoader.load`, etc.) ([#495](https://github.com/maps-as-data/MapReader/pull/495)) - `load_frames.py` added to `mapreader.utils`. This has functions for loading from various file formats (e.g. CSV, Excel, GeoJSON, etc.) and converting to GeoDataFrames ([#495](https://github.com/maps-as-data/MapReader/pull/495)) +- Added tests for text spotting code ([#500](https://github.com/maps-as-data/MapReader/pull/500)) ### Changed diff --git a/docs/source/community-and-contributions/contribution-guide/developers-corner/running-tests.rst b/docs/source/community-and-contributions/contribution-guide/developers-corner/running-tests.rst index db20c10a..4b885ef4 100644 --- a/docs/source/community-and-contributions/contribution-guide/developers-corner/running-tests.rst +++ b/docs/source/community-and-contributions/contribution-guide/developers-corner/running-tests.rst @@ -1,9 +1,9 @@ Running tests ============= -To run the tests for MapReader, you will need to have installed the **dev dependencies** as described above. +To run the tests for MapReader, you will need to have installed the **dev dependencies** (as described :doc:`here `. -Also, if you have followed the "Install from PyPI" instructions, you will need to clone the MapReader repository to access the tests. i.e.: +.. note:: If you have followed the "Install from PyPI" instructions, you will also need to clone the MapReader repository to access the tests. i.e.: .. code-block:: bash @@ -18,3 +18,44 @@ You can then run the tests using from the root of the MapReader directory using python -m pytest -v If all tests pass, this means that MapReader has been installed and is working as expected. + +Testing text spotting +--------------------- + +The tests for the text spotting code are separated from the main tests due to dependency conflicts. + +You will only be able to run the text spotting tests for the text spotting framework (DPTextDETR, DeepSolo or MapTextPipeline) you have installed. + +For DPTextDETR, use the following commands: + +.. code-block:: bash + + cd path/to/MapReader # change this to your path, e.g. cd ~/MapReader + conda activate mapreader + export ADET_PATH=path/to/DPTextDETR # change this to the path where you have saved the DPTextDETR repository + wget https://huggingface.co/rwood-97/DPText_DETR_ArT_R_50_poly/resolve/main/art_final.pth # download the model weights + python -m pytest -v tests_text_spotting/test_dptext_runner.py + + +For DeepSolo: + +.. code-block:: bash + + cd path/to/MapReader # change this to your path, e.g. cd ~/MapReader + conda activate mapreader + export ADET_PATH=path/to/DeepSolo # change this to the path where you have saved the DeepSolo repository + wget https://huggingface.co/rwood-97/DeepSolo_ic15_res50/resolve/main/ic15_res50_finetune_synth-tt-mlt-13-15-textocr.pth # download the model weights + python -m pytest -v tests_text_spotting/test_deepsolo_runner.py + +For MapTextPipeline: + +.. code-block:: bash + + cd path/to/MapReader # change this to your path, e.g. cd ~/MapReader + conda activate mapreader + export ADET_PATH=path/to/MapTextPipeline # change this to the path where you have saved the MapTextPipeline repository + wget https://huggingface.co/rwood-97/MapTextPipeline_rumsey/resolve/main/rumsey-finetune.pth # download the model weights + python -m pytest -v tests_text_spotting/test_maptext_runner.py + + +If all tests pass, this means that the text spotting framework has been installed and is working as expected. diff --git a/test_text_spotting/test_deepsolo_runner.py b/test_text_spotting/test_deepsolo_runner.py new file mode 100644 index 00000000..c62a79f6 --- /dev/null +++ b/test_text_spotting/test_deepsolo_runner.py @@ -0,0 +1,228 @@ +from __future__ import annotations + +import os +import pathlib +import pickle + +import adet +import geopandas as gpd +import pandas as pd +import pytest +from detectron2.engine import DefaultPredictor +from detectron2.structures.instances import Instances + +from mapreader import DeepSoloRunner +from mapreader.load import MapImages + +print(adet.__version__) + +# use cloned DeepSolo path if running in github actions +ADET_PATH = ( + pathlib.Path("./DeepSolo/").resolve() + if os.getenv("GITHUB_ACTIONS") == "true" + else pathlib.Path(os.getenv("ADET_PATH")).resolve() +) + + +@pytest.fixture +def sample_dir(): + return pathlib.Path(__file__).resolve().parent.parent / "tests" / "sample_files" + + +@pytest.fixture +def init_dataframes(sample_dir, tmp_path): + """Initializes MapImages object (with metadata from csv and patches) and creates parent and patch dataframes. + Returns + ------- + tuple + path to parent and patch dataframes + """ + maps = MapImages(f"{sample_dir}/mapreader_text.png") + maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") + maps.patchify_all(patch_size=800, path_save=tmp_path) + maps.check_georeferencing() + parent_df, patch_df = maps.convert_images() + return parent_df, patch_df + + +@pytest.fixture(scope="function") +def mock_response(monkeypatch, sample_dir): + def mock_pred(self, *args, **kwargs): + with open(f"{sample_dir}/patch-0-0-800-40-deepsolo-pred.pkl", "rb") as f: + outputs = pickle.load(f) + return outputs + + monkeypatch.setattr(DefaultPredictor, "__call__", mock_pred) + + +@pytest.fixture +def init_runner(init_dataframes): + parent_df, patch_df = init_dataframes + runner = DeepSoloRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + return runner + + +@pytest.fixture +def runner_run_all(init_runner, mock_response): + runner = init_runner + _ = runner.run_all() + return runner + + +def test_deepsolo_init(init_dataframes): + parent_df, patch_df = init_dataframes + runner = DeepSoloRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + assert isinstance(runner, DeepSoloRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_deepsolo_init_str(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") + runner = DeepSoloRunner( + f"{tmp_path}/patch_df.csv", + parent_df=f"{tmp_path}/parent_df.csv", + cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + assert isinstance(runner, DeepSoloRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_deepsolo_init_pathlib(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") + runner = DeepSoloRunner( + pathlib.Path(f"{tmp_path}/patch_df.csv"), + parent_df=pathlib.Path(f"{tmp_path}/parent_df.csv"), + cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + assert isinstance(runner, DeepSoloRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_deepsolo_init_tsv(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.tsv", sep="\t") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.tsv", sep="\t") + runner = DeepSoloRunner( + f"{tmp_path}/patch_df.tsv", + parent_df=f"{tmp_path}/parent_df.tsv", + delimiter="\t", + cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + assert isinstance(runner, DeepSoloRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_deepsolo_run_all(init_runner, mock_response): + runner = init_runner + # dict + out = runner.run_all() + assert isinstance(out, dict) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() + assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False) + assert isinstance(out, pd.DataFrame) + assert set(out.columns) == set(["image_id", "geometry", "text", "score"]) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values + + +def test_deepsolo_convert_to_parent(runner_run_all, mock_response): + runner = runner_run_all + # dict + out = runner.convert_to_parent_pixel_bounds() + assert isinstance(out, dict) + assert "mapreader_text.png" in out.keys() + assert isinstance(out["mapreader_text.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True) + assert isinstance(out, pd.DataFrame) + assert set(out.columns) == set( + ["image_id", "patch_id", "geometry", "text", "score"] + ) + assert "mapreader_text.png" in out["image_id"].values + + +def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response): + runner = runner_run_all + # dict + out = runner.convert_to_coords() + assert isinstance(out, dict) + assert "mapreader_text.png" in out.keys() + assert isinstance(out["mapreader_text.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True) + assert isinstance(out, gpd.GeoDataFrame) + assert set(out.columns) == set( + ["image_id", "patch_id", "geometry", "crs", "text", "score"] + ) + assert "mapreader_text.png" in out["image_id"].values + assert out.crs == runner.parent_df.crs + + +def test_deepsolo_deduplicate(sample_dir, tmp_path, mock_response): + maps = MapImages(f"{sample_dir}/mapreader_text.png") + maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") + maps.patchify_all(patch_size=800, path_save=tmp_path, overlap=0.5) + maps.check_georeferencing() + parent_df, patch_df = maps.convert_images() + runner = DeepSoloRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + _ = runner.run_all() + out = runner.convert_to_parent_pixel_bounds(deduplicate=False) + len_before = len(out["mapreader_text.png"]) + runner.parent_predictions = {} + out_07 = runner.convert_to_parent_pixel_bounds(deduplicate=True) + len_07 = len(out_07["mapreader_text.png"]) + print(len_before, len_07) + assert len_before >= len_07 + runner.parent_predictions = {} + out_05 = runner.convert_to_parent_pixel_bounds(deduplicate=True, min_ioa=0.5) + len_05 = len(out_05["mapreader_text.png"]) + print(len_before, len_05) + assert len_before >= len_05 + assert len_07 >= len_05 + + +def test_deepsolo_run_on_image(init_runner, mock_response): + runner = init_runner + out = runner.run_on_image( + runner.patch_df.iloc[0]["image_path"], return_outputs=True + ) + assert isinstance(out, dict) + assert "instances" in out.keys() + assert isinstance(out["instances"], Instances) + + +def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + _ = runner.convert_to_coords() + runner.save_to_geojson(f"{tmp_path}/text.geojson") + assert os.path.exists(f"{tmp_path}/text.geojson") + gdf = gpd.read_file(f"{tmp_path}/text.geojson") + assert isinstance(gdf, gpd.GeoDataFrame) + assert set(gdf.columns) == set( + ["image_id", "patch_id", "geometry", "crs", "text", "score"] + ) diff --git a/test_text_spotting/test_dptext_runner.py b/test_text_spotting/test_dptext_runner.py new file mode 100644 index 00000000..5006a129 --- /dev/null +++ b/test_text_spotting/test_dptext_runner.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +import os +import pathlib +import pickle + +import adet +import geopandas as gpd +import pandas as pd +import pytest +from detectron2.engine import DefaultPredictor +from detectron2.structures.instances import Instances + +from mapreader import DPTextDETRRunner +from mapreader.load import MapImages + +print(adet.__version__) + +# use cloned DPText-DETR path if running in github actions +ADET_PATH = ( + pathlib.Path("./DPText-DETR/").resolve() + if os.getenv("GITHUB_ACTIONS") == "true" + else pathlib.Path(os.getenv("ADET_PATH")).resolve() +) + + +@pytest.fixture +def sample_dir(): + return pathlib.Path(__file__).resolve().parent.parent / "tests" / "sample_files" + + +@pytest.fixture +def init_dataframes(sample_dir, tmp_path): + """Initializes MapImages object (with metadata from csv and patches) and creates parent and patch dataframes. + Returns + ------- + tuple + path to parent and patch dataframes + """ + maps = MapImages(f"{sample_dir}/mapreader_text.png") + maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") + maps.patchify_all(patch_size=800, path_save=tmp_path) + maps.check_georeferencing() + parent_df, patch_df = maps.convert_images() + return parent_df, patch_df + + +@pytest.fixture(scope="function") +def mock_response(monkeypatch, sample_dir): + def mock_pred(self, *args, **kwargs): + with open(f"{sample_dir}/patch-0-0-800-40-dptext-detr-pred.pkl", "rb") as f: + outputs = pickle.load(f) + return outputs + + monkeypatch.setattr(DefaultPredictor, "__call__", mock_pred) + + +@pytest.fixture +def init_runner(init_dataframes): + parent_df, patch_df = init_dataframes + runner = DPTextDETRRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + return runner + + +@pytest.fixture +def runner_run_all(init_runner, mock_response): + runner = init_runner + _ = runner.run_all() + return runner + + +def test_dptext_init(init_dataframes): + parent_df, patch_df = init_dataframes + runner = DPTextDETRRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + assert isinstance(runner, DPTextDETRRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_dptext_init_str(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") + runner = DPTextDETRRunner( + f"{tmp_path}/patch_df.csv", + parent_df=f"{tmp_path}/parent_df.csv", + cfg_file=f"{ADET_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + assert isinstance(runner, DPTextDETRRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_dptext_init_pathlib(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") + runner = DPTextDETRRunner( + pathlib.Path(f"{tmp_path}/patch_df.csv"), + parent_df=pathlib.Path(f"{tmp_path}/parent_df.csv"), + cfg_file=f"{ADET_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + assert isinstance(runner, DPTextDETRRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_dptext_init_tsv(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.tsv", sep="\t") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.tsv", sep="\t") + runner = DPTextDETRRunner( + f"{tmp_path}/patch_df.tsv", + parent_df=f"{tmp_path}/parent_df.tsv", + delimiter="\t", + cfg_file=f"{ADET_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + assert isinstance(runner, DPTextDETRRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_dptext_run_all(init_runner, mock_response): + runner = init_runner + # dict + out = runner.run_all() + assert isinstance(out, dict) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() + assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False) + assert isinstance(out, pd.DataFrame) + assert set(out.columns) == set(["image_id", "geometry", "score"]) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values + + +def test_dptext_convert_to_parent(runner_run_all, mock_response): + runner = runner_run_all + # dict + out = runner.convert_to_parent_pixel_bounds() + assert isinstance(out, dict) + assert "mapreader_text.png" in out.keys() + assert isinstance(out["mapreader_text.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True) + assert isinstance(out, pd.DataFrame) + assert set(out.columns) == set(["image_id", "patch_id", "geometry", "score"]) + assert "mapreader_text.png" in out["image_id"].values + + +def test_dptext_convert_to_parent_coords(runner_run_all, mock_response): + runner = runner_run_all + # dict + out = runner.convert_to_coords() + assert isinstance(out, dict) + assert "mapreader_text.png" in out.keys() + assert isinstance(out["mapreader_text.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True) + assert isinstance(out, gpd.GeoDataFrame) + assert set(out.columns) == set(["image_id", "patch_id", "geometry", "crs", "score"]) + assert "mapreader_text.png" in out["image_id"].values + assert out.crs == runner.parent_df.crs + + +def test_dptext_deduplicate(sample_dir, tmp_path, mock_response): + maps = MapImages(f"{sample_dir}/mapreader_text.png") + maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") + maps.patchify_all(patch_size=800, path_save=tmp_path, overlap=0.5) + maps.check_georeferencing() + parent_df, patch_df = maps.convert_images() + runner = DPTextDETRRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/DPText_DETR/ArT/R_50_poly.yaml", + ) + _ = runner.run_all() + out = runner.convert_to_parent_pixel_bounds(deduplicate=False) + len_before = len(out["mapreader_text.png"]) + runner.parent_predictions = {} + out_07 = runner.convert_to_parent_pixel_bounds(deduplicate=True) + len_07 = len(out_07["mapreader_text.png"]) + print(len_before, len_07) + assert len_before >= len_07 + runner.parent_predictions = {} + out_05 = runner.convert_to_parent_pixel_bounds(deduplicate=True, min_ioa=0.5) + len_05 = len(out_05["mapreader_text.png"]) + print(len_before, len_05) + assert len_before >= len_05 + assert len_07 >= len_05 + + +def test_dptext_run_on_image(init_runner, mock_response): + runner = init_runner + out = runner.run_on_image( + runner.patch_df.iloc[0]["image_path"], return_outputs=True + ) + assert isinstance(out, dict) + assert "instances" in out.keys() + assert isinstance(out["instances"], Instances) + + +def test_dptext_save_to_geojson(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + _ = runner.convert_to_coords() + runner.save_to_geojson(f"{tmp_path}/text.geojson") + assert os.path.exists(f"{tmp_path}/text.geojson") + gdf = gpd.read_file(f"{tmp_path}/text.geojson") + assert isinstance(gdf, gpd.GeoDataFrame) + assert set(gdf.columns) == set(["image_id", "patch_id", "geometry", "crs", "score"]) diff --git a/test_text_spotting/test_maptext_runner.py b/test_text_spotting/test_maptext_runner.py new file mode 100644 index 00000000..9690bcc9 --- /dev/null +++ b/test_text_spotting/test_maptext_runner.py @@ -0,0 +1,228 @@ +from __future__ import annotations + +import os +import pathlib +import pickle + +import adet +import geopandas as gpd +import pandas as pd +import pytest +from detectron2.engine import DefaultPredictor +from detectron2.structures.instances import Instances + +from mapreader import MapTextRunner +from mapreader.load import MapImages + +print(adet.__version__) + +# use cloned MapTextPipeline path if running in github actions +ADET_PATH = ( + pathlib.Path("./MapTextPipeline/").resolve() + if os.getenv("GITHUB_ACTIONS") == "true" + else pathlib.Path(os.getenv("ADET_PATH")).resolve() +) + + +@pytest.fixture +def sample_dir(): + return pathlib.Path(__file__).resolve().parent.parent / "tests" / "sample_files" + + +@pytest.fixture +def init_dataframes(sample_dir, tmp_path): + """Initializes MapImages object (with metadata from csv and patches) and creates parent and patch dataframes. + Returns + ------- + tuple + path to parent and patch dataframes + """ + maps = MapImages(f"{sample_dir}/mapreader_text.png") + maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") + maps.patchify_all(patch_size=800, path_save=tmp_path) + maps.check_georeferencing() + parent_df, patch_df = maps.convert_images() + return parent_df, patch_df + + +@pytest.fixture(scope="function") +def mock_response(monkeypatch, sample_dir): + def mock_pred(self, *args, **kwargs): + with open(f"{sample_dir}/patch-0-0-800-40-maptext-pred.pkl", "rb") as f: + outputs = pickle.load(f) + return outputs + + monkeypatch.setattr(DefaultPredictor, "__call__", mock_pred) + + +@pytest.fixture +def init_runner(init_dataframes): + parent_df, patch_df = init_dataframes + runner = MapTextRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/ViTAEv2_S/rumsey/final_rumsey.yaml", + ) + return runner + + +@pytest.fixture +def runner_run_all(init_runner, mock_response): + runner = init_runner + _ = runner.run_all() + return runner + + +def test_maptext_init(init_dataframes): + parent_df, patch_df = init_dataframes + runner = MapTextRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/ViTAEv2_S/rumsey/final_rumsey.yaml", + ) + assert isinstance(runner, MapTextRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_maptext_init_str(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") + runner = MapTextRunner( + f"{tmp_path}/patch_df.csv", + parent_df=f"{tmp_path}/parent_df.csv", + cfg_file=f"{ADET_PATH}/configs/ViTAEv2_S/rumsey/final_rumsey.yaml", + ) + assert isinstance(runner, MapTextRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_maptext_init_pathlib(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") + runner = MapTextRunner( + pathlib.Path(f"{tmp_path}/patch_df.csv"), + parent_df=pathlib.Path(f"{tmp_path}/parent_df.csv"), + cfg_file=f"{ADET_PATH}/configs/ViTAEv2_S/rumsey/final_rumsey.yaml", + ) + assert isinstance(runner, MapTextRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_maptext_init_tsv(init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.tsv", sep="\t") + patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.tsv", sep="\t") + runner = MapTextRunner( + f"{tmp_path}/patch_df.tsv", + parent_df=f"{tmp_path}/parent_df.tsv", + delimiter="\t", + cfg_file=f"{ADET_PATH}/configs/ViTAEv2_S/rumsey/final_rumsey.yaml", + ) + assert isinstance(runner, MapTextRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) + assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) + + +def test_maptext_run_all(init_runner, mock_response): + runner = init_runner + # dict + out = runner.run_all() + assert isinstance(out, dict) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() + assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False) + assert isinstance(out, pd.DataFrame) + assert set(out.columns) == set(["image_id", "geometry", "text", "score"]) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values + + +def test_maptext_convert_to_parent(runner_run_all, mock_response): + runner = runner_run_all + # dict + out = runner.convert_to_parent_pixel_bounds() + assert isinstance(out, dict) + assert "mapreader_text.png" in out.keys() + assert isinstance(out["mapreader_text.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True) + assert isinstance(out, pd.DataFrame) + assert set(out.columns) == set( + ["image_id", "patch_id", "geometry", "text", "score"] + ) + assert "mapreader_text.png" in out["image_id"].values + + +def test_maptext_convert_to_parent_coords(runner_run_all, mock_response): + runner = runner_run_all + # dict + out = runner.convert_to_coords() + assert isinstance(out, dict) + assert "mapreader_text.png" in out.keys() + assert isinstance(out["mapreader_text.png"], list) + # dataframe + out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True) + assert isinstance(out, gpd.GeoDataFrame) + assert set(out.columns) == set( + ["image_id", "patch_id", "geometry", "crs", "text", "score"] + ) + assert "mapreader_text.png" in out["image_id"].values + assert out.crs == runner.parent_df.crs + + +def test_maptext_deduplicate(sample_dir, tmp_path, mock_response): + maps = MapImages(f"{sample_dir}/mapreader_text.png") + maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") + maps.patchify_all(patch_size=800, path_save=tmp_path, overlap=0.5) + maps.check_georeferencing() + parent_df, patch_df = maps.convert_images() + runner = MapTextRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{ADET_PATH}/configs/ViTAEv2_S/rumsey/final_rumsey.yaml", + ) + _ = runner.run_all() + out = runner.convert_to_parent_pixel_bounds(deduplicate=False) + len_before = len(out["mapreader_text.png"]) + runner.parent_predictions = {} + out_07 = runner.convert_to_parent_pixel_bounds(deduplicate=True) + len_07 = len(out_07["mapreader_text.png"]) + print(len_before, len_07) + assert len_before >= len_07 + runner.parent_predictions = {} + out_05 = runner.convert_to_parent_pixel_bounds(deduplicate=True, min_ioa=0.5) + len_05 = len(out_05["mapreader_text.png"]) + print(len_before, len_05) + assert len_before >= len_05 + assert len_07 >= len_05 + + +def test_maptext_run_on_image(init_runner, mock_response): + runner = init_runner + out = runner.run_on_image( + runner.patch_df.iloc[0]["image_path"], return_outputs=True + ) + assert isinstance(out, dict) + assert "instances" in out.keys() + assert isinstance(out["instances"], Instances) + + +def test_maptext_save_to_geojson(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + _ = runner.convert_to_coords() + runner.save_to_geojson(f"{tmp_path}/text.geojson") + assert os.path.exists(f"{tmp_path}/text.geojson") + gdf = gpd.read_file(f"{tmp_path}/text.geojson") + assert isinstance(gdf, gpd.GeoDataFrame) + assert set(gdf.columns) == set( + ["image_id", "patch_id", "geometry", "crs", "text", "score"] + ) diff --git a/tests/sample_files/mapreader_text.png b/tests/sample_files/mapreader_text.png new file mode 100644 index 00000000..402b53b7 Binary files /dev/null and b/tests/sample_files/mapreader_text.png differ diff --git a/tests/sample_files/mapreader_text_metadata.csv b/tests/sample_files/mapreader_text_metadata.csv new file mode 100644 index 00000000..7aa8481a --- /dev/null +++ b/tests/sample_files/mapreader_text_metadata.csv @@ -0,0 +1,2 @@ +,name,url,coordinates,crs,published_date,grid_bb +0,mapreader_text.png,https://maps.nls.uk/view/74488689,"(-4.833984375, 55.78892895389263, -4.19677734375, 56.05976947910656)",EPSG:4326,1898,"[(14, 7972, 5097)x(14, 8000, 5118)]" diff --git a/tests/sample_files/patch-0-0-800-40-deepsolo-pred.pkl b/tests/sample_files/patch-0-0-800-40-deepsolo-pred.pkl new file mode 100644 index 00000000..97aadcd8 Binary files /dev/null and b/tests/sample_files/patch-0-0-800-40-deepsolo-pred.pkl differ diff --git a/tests/sample_files/patch-0-0-800-40-dptext-detr-pred.pkl b/tests/sample_files/patch-0-0-800-40-dptext-detr-pred.pkl new file mode 100644 index 00000000..c4e19624 Binary files /dev/null and b/tests/sample_files/patch-0-0-800-40-dptext-detr-pred.pkl differ diff --git a/tests/sample_files/patch-0-0-800-40-maptext-pred.pkl b/tests/sample_files/patch-0-0-800-40-maptext-pred.pkl new file mode 100644 index 00000000..f8ec12e2 Binary files /dev/null and b/tests/sample_files/patch-0-0-800-40-maptext-pred.pkl differ