-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #500 from maps-as-data/test_text_spotting
Add tests for text spotting code
- Loading branch information
Showing
11 changed files
with
813 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
--- | ||
name: Units Tests - Text Spotting | ||
|
||
on: [push] | ||
|
||
# Run linter with github actions for quick feedbacks. | ||
jobs: | ||
|
||
macos_tests: | ||
runs-on: macos-latest | ||
# run on PRs, or commits to facebookresearch (not internal) | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
torch: ["1.13.1", "2.2.2"] | ||
include: | ||
- torch: "1.13.1" | ||
torchvision: "0.14.1" | ||
- torch: "2.2.2" | ||
torchvision: "0.17.2" | ||
|
||
env: | ||
# point datasets to ~/.torch so it's cached by CI | ||
DETECTRON2_DATASETS: ~/.torch/datasets | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
|
||
- name: Set up Python 3.9 | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: 3.9 | ||
|
||
- name: Update pip | ||
run: | | ||
python -m ensurepip | ||
python -m pip install --upgrade pip | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install -U pip | ||
python -m pip install wheel ninja opencv-python-headless onnx pytest-xdist | ||
python -m pip install numpy==1.26.4 | ||
python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html | ||
# install from github to get latest; install iopath first since fvcore depends on it | ||
python -m pip install -U 'git+https://github.com/facebookresearch/iopath' | ||
python -m pip install -U 'git+https://github.com/facebookresearch/fvcore' | ||
wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py | ||
python collect_env.py | ||
- name: Build and install | ||
run: | | ||
CC=clang CXX=clang++ python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' | ||
python -m detectron2.utils.collect_env | ||
python -m pip install ".[dev]" | ||
- name: Install DPText-DETR | ||
run: | | ||
git clone https://github.com/maps-as-data/DPText-DETR.git | ||
python -m pip install 'git+https://github.com/maps-as-data/DPText-DETR.git' # Install DPText-DETR | ||
python -m pip install numpy==1.26.4 | ||
wget https://huggingface.co/rwood-97/DPText_DETR_ArT_R_50_poly/resolve/main/art_final.pth | ||
- name: Run DPText-DETR unittests | ||
run: | | ||
python -m pytest test_text_spotting/test_dptext_runner.py | ||
- name: Install DeepSolo | ||
run: | | ||
git clone https://github.com/maps-as-data/DeepSolo.git | ||
python -m pip install 'git+https://github.com/maps-as-data/DeepSolo.git' --force-reinstall --no-deps # Install DeepSolo | ||
python -m pip install numpy==1.26.4 | ||
wget https://huggingface.co/rwood-97/DeepSolo_ic15_res50/resolve/main/ic15_res50_finetune_synth-tt-mlt-13-15-textocr.pth | ||
- name: Run DeepSolo unittests | ||
run: | | ||
python -m pytest test_text_spotting/test_deepsolo_runner.py | ||
- name: Install MapTextPipeline | ||
run: | | ||
git clone https://github.com/maps-as-data/MapTextPipeline.git | ||
python -m pip install 'git+https://github.com/maps-as-data/MapTextPipeline.git' --force-reinstall --no-deps # Install MapTextPipeline | ||
python -m pip install "numpy<2.0.0" | ||
wget https://huggingface.co/rwood-97/MapTextPipeline_rumsey/resolve/main/rumsey-finetune.pth | ||
- name: Run MapTextPipeline unittests | ||
run: | | ||
python -m pytest test_text_spotting/test_maptext_runner.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
from __future__ import annotations | ||
|
||
import os | ||
import pathlib | ||
import pickle | ||
|
||
import adet | ||
import geopandas as gpd | ||
import pandas as pd | ||
import pytest | ||
from detectron2.engine import DefaultPredictor | ||
from detectron2.structures.instances import Instances | ||
|
||
from mapreader import DeepSoloRunner | ||
from mapreader.load import MapImages | ||
|
||
print(adet.__version__) | ||
|
||
# use cloned DeepSolo path if running in github actions | ||
ADET_PATH = ( | ||
pathlib.Path("./DeepSolo/").resolve() | ||
if os.getenv("GITHUB_ACTIONS") == "true" | ||
else pathlib.Path(os.getenv("ADET_PATH")).resolve() | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def sample_dir(): | ||
return pathlib.Path(__file__).resolve().parent.parent / "tests" / "sample_files" | ||
|
||
|
||
@pytest.fixture | ||
def init_dataframes(sample_dir, tmp_path): | ||
"""Initializes MapImages object (with metadata from csv and patches) and creates parent and patch dataframes. | ||
Returns | ||
------- | ||
tuple | ||
path to parent and patch dataframes | ||
""" | ||
maps = MapImages(f"{sample_dir}/mapreader_text.png") | ||
maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") | ||
maps.patchify_all(patch_size=800, path_save=tmp_path) | ||
maps.check_georeferencing() | ||
parent_df, patch_df = maps.convert_images() | ||
return parent_df, patch_df | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def mock_response(monkeypatch, sample_dir): | ||
def mock_pred(self, *args, **kwargs): | ||
with open(f"{sample_dir}/patch-0-0-800-40-deepsolo-pred.pkl", "rb") as f: | ||
outputs = pickle.load(f) | ||
return outputs | ||
|
||
monkeypatch.setattr(DefaultPredictor, "__call__", mock_pred) | ||
|
||
|
||
@pytest.fixture | ||
def init_runner(init_dataframes): | ||
parent_df, patch_df = init_dataframes | ||
runner = DeepSoloRunner( | ||
patch_df, | ||
parent_df=parent_df, | ||
cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", | ||
) | ||
return runner | ||
|
||
|
||
@pytest.fixture | ||
def runner_run_all(init_runner, mock_response): | ||
runner = init_runner | ||
_ = runner.run_all() | ||
return runner | ||
|
||
|
||
def test_deepsolo_init(init_dataframes): | ||
parent_df, patch_df = init_dataframes | ||
runner = DeepSoloRunner( | ||
patch_df, | ||
parent_df=parent_df, | ||
cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", | ||
) | ||
assert isinstance(runner, DeepSoloRunner) | ||
assert isinstance(runner.predictor, DefaultPredictor) | ||
assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) | ||
assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) | ||
|
||
|
||
def test_deepsolo_init_str(init_dataframes, tmp_path): | ||
parent_df, patch_df = init_dataframes | ||
parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") | ||
patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") | ||
runner = DeepSoloRunner( | ||
f"{tmp_path}/patch_df.csv", | ||
parent_df=f"{tmp_path}/parent_df.csv", | ||
cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", | ||
) | ||
assert isinstance(runner, DeepSoloRunner) | ||
assert isinstance(runner.predictor, DefaultPredictor) | ||
assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) | ||
assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) | ||
|
||
|
||
def test_deepsolo_init_pathlib(init_dataframes, tmp_path): | ||
parent_df, patch_df = init_dataframes | ||
parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.csv") | ||
patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.csv") | ||
runner = DeepSoloRunner( | ||
pathlib.Path(f"{tmp_path}/patch_df.csv"), | ||
parent_df=pathlib.Path(f"{tmp_path}/parent_df.csv"), | ||
cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", | ||
) | ||
assert isinstance(runner, DeepSoloRunner) | ||
assert isinstance(runner.predictor, DefaultPredictor) | ||
assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) | ||
assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) | ||
|
||
|
||
def test_deepsolo_init_tsv(init_dataframes, tmp_path): | ||
parent_df, patch_df = init_dataframes | ||
parent_df = parent_df.to_csv(f"{tmp_path}/parent_df.tsv", sep="\t") | ||
patch_df = patch_df.to_csv(f"{tmp_path}/patch_df.tsv", sep="\t") | ||
runner = DeepSoloRunner( | ||
f"{tmp_path}/patch_df.tsv", | ||
parent_df=f"{tmp_path}/parent_df.tsv", | ||
delimiter="\t", | ||
cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", | ||
) | ||
assert isinstance(runner, DeepSoloRunner) | ||
assert isinstance(runner.predictor, DefaultPredictor) | ||
assert isinstance(runner.parent_df.iloc[0]["coordinates"], tuple) | ||
assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) | ||
|
||
|
||
def test_deepsolo_run_all(init_runner, mock_response): | ||
runner = init_runner | ||
# dict | ||
out = runner.run_all() | ||
assert isinstance(out, dict) | ||
assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() | ||
assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) | ||
# dataframe | ||
out = runner._dict_to_dataframe(runner.patch_predictions, geo=False, parent=False) | ||
assert isinstance(out, pd.DataFrame) | ||
assert set(out.columns) == set(["image_id", "geometry", "text", "score"]) | ||
assert "patch-0-0-800-40-#mapreader_text.png#.png" in out["image_id"].values | ||
|
||
|
||
def test_deepsolo_convert_to_parent(runner_run_all, mock_response): | ||
runner = runner_run_all | ||
# dict | ||
out = runner.convert_to_parent_pixel_bounds() | ||
assert isinstance(out, dict) | ||
assert "mapreader_text.png" in out.keys() | ||
assert isinstance(out["mapreader_text.png"], list) | ||
# dataframe | ||
out = runner._dict_to_dataframe(runner.parent_predictions, geo=False, parent=True) | ||
assert isinstance(out, pd.DataFrame) | ||
assert set(out.columns) == set( | ||
["image_id", "patch_id", "geometry", "text", "score"] | ||
) | ||
assert "mapreader_text.png" in out["image_id"].values | ||
|
||
|
||
def test_deepsolo_convert_to_parent_coords(runner_run_all, mock_response): | ||
runner = runner_run_all | ||
# dict | ||
out = runner.convert_to_coords() | ||
assert isinstance(out, dict) | ||
assert "mapreader_text.png" in out.keys() | ||
assert isinstance(out["mapreader_text.png"], list) | ||
# dataframe | ||
out = runner._dict_to_dataframe(runner.geo_predictions, geo=True, parent=True) | ||
assert isinstance(out, gpd.GeoDataFrame) | ||
assert set(out.columns) == set( | ||
["image_id", "patch_id", "geometry", "crs", "text", "score"] | ||
) | ||
assert "mapreader_text.png" in out["image_id"].values | ||
assert out.crs == runner.parent_df.crs | ||
|
||
|
||
def test_deepsolo_deduplicate(sample_dir, tmp_path, mock_response): | ||
maps = MapImages(f"{sample_dir}/mapreader_text.png") | ||
maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") | ||
maps.patchify_all(patch_size=800, path_save=tmp_path, overlap=0.5) | ||
maps.check_georeferencing() | ||
parent_df, patch_df = maps.convert_images() | ||
runner = DeepSoloRunner( | ||
patch_df, | ||
parent_df=parent_df, | ||
cfg_file=f"{ADET_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", | ||
) | ||
_ = runner.run_all() | ||
out = runner.convert_to_parent_pixel_bounds(deduplicate=False) | ||
len_before = len(out["mapreader_text.png"]) | ||
runner.parent_predictions = {} | ||
out_07 = runner.convert_to_parent_pixel_bounds(deduplicate=True) | ||
len_07 = len(out_07["mapreader_text.png"]) | ||
print(len_before, len_07) | ||
assert len_before >= len_07 | ||
runner.parent_predictions = {} | ||
out_05 = runner.convert_to_parent_pixel_bounds(deduplicate=True, min_ioa=0.5) | ||
len_05 = len(out_05["mapreader_text.png"]) | ||
print(len_before, len_05) | ||
assert len_before >= len_05 | ||
assert len_07 >= len_05 | ||
|
||
|
||
def test_deepsolo_run_on_image(init_runner, mock_response): | ||
runner = init_runner | ||
out = runner.run_on_image( | ||
runner.patch_df.iloc[0]["image_path"], return_outputs=True | ||
) | ||
assert isinstance(out, dict) | ||
assert "instances" in out.keys() | ||
assert isinstance(out["instances"], Instances) | ||
|
||
|
||
def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response): | ||
runner = runner_run_all | ||
_ = runner.convert_to_coords() | ||
runner.save_to_geojson(f"{tmp_path}/text.geojson") | ||
assert os.path.exists(f"{tmp_path}/text.geojson") | ||
gdf = gpd.read_file(f"{tmp_path}/text.geojson") | ||
assert isinstance(gdf, gpd.GeoDataFrame) | ||
assert set(gdf.columns) == set( | ||
["image_id", "patch_id", "geometry", "crs", "text", "score"] | ||
) |
Oops, something went wrong.