Skip to content

Commit

Permalink
TLDR-850 fixes after review
Browse files Browse the repository at this point in the history
  • Loading branch information
oksidgy committed Nov 14, 2024
1 parent e5f829e commit d2fdbb3
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions tests/api_tests/test_api_module_table_recognizer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
import unittest
from typing import List
Expand All @@ -11,6 +12,12 @@ class TestRecognizedTable(AbstractTestApiDocReader):
def _get_abs_path(self, file_name: str) -> str:
return os.path.join(self.data_directory_path, "tables", file_name)

def _test_bbox_annotations(self, node: dict, target_dict: dict) -> None:
annotations = [annotation for annotation in node["annotations"] if annotation["name"] == "bounding box"]
annotations_dict = json.loads(annotations[0]["value"])
for key in target_dict:
self.assertAlmostEqual(float(annotations_dict[key]), target_dict[key], None, None, delta=0.05)

def test_api_table_recognition_3(self) -> None:
file_name = "example_with_table16.jpg"
res = self._send_request(file_name)
Expand Down Expand Up @@ -218,7 +225,26 @@ def test_multipage_gost_table_image(self) -> None:
file_name = "gost_multipage_table.pdf"
result = self._send_request(file_name, data={"need_gost_frame_analysis": "True"}) # don't pass pdf_with_text_layer to check condition in PDFBaseReader
self.assertTrue(len(result["content"]["tables"][0]["cells"]) > 35)
target_bbox_dict = {
"x_top_left": 0.14,
"y_top_left": 0.11,
"width": 0.07,
"height": 0.01,
"page_width": 1653,
"page_height": 2339
}
self._test_bbox_annotations(result["content"]["structure"]["subparagraphs"][0], target_bbox_dict)
self.assertTrue("Состав квалификационных испытаний" in result["content"]["structure"]["subparagraphs"][0]["text"])
self.assertTrue("KR13" in result["content"]["tables"][0]["cells"][-1][0]["lines"][0]["text"]) # check the last row of multipage table
target_bbox_dict_1 = {
"x_top_left": 0.15,
"y_top_left": 0.58,
"width": 0.04,
"height": 0.009,
"page_width": 1653,
"page_height": 2339
}
self._test_bbox_annotations(result["content"]["tables"][0]["cells"][-1][0]["lines"][0], target_bbox_dict_1)
self.assertTrue("R13.1" in result["content"]["tables"][0]["cells"][-1][1]["lines"][0]["text"]) # check that it belongs to first and only table
self.assertTrue("Испытание по проверке" in result["content"]["tables"][0]["cells"][-1][2]["lines"][0]["text"])
self.assertTrue("3.6" in result["content"]["tables"][0]["cells"][-1][3]["lines"][0]["text"])
Expand All @@ -244,4 +270,22 @@ def test_multipage_gost_table_with_text_layer_and_pages_param(self) -> None:
self.assertEqual(len(result["content"]["tables"]), 1)
self.assertEqual(len(result["content"]["tables"][0]["cells"]), 5)
self.assertTrue("SAMPLE TEXT" in result["content"]["tables"][0]["cells"][0][0]["lines"][0]["text"])
target_bbox_dict_1 = {
"x_top_left": 0.13,
"y_top_left": 0.07,
"width": 0.06,
"height": 0.007,
"page_width": 595,
"page_height": 841
}
self._test_bbox_annotations(result["content"]["tables"][0]["cells"][0][0]["lines"][0], target_bbox_dict_1)
self.assertTrue("2" in result["content"]["tables"][0]["cells"][-1][0]["lines"][0]["text"])
target_bbox_dict_2 = {
"x_top_left": 0.13,
"y_top_left": 0.15,
"width": 0.005,
"height": 0.007,
"page_width": 595,
"page_height": 841
}
self._test_bbox_annotations(result["content"]["tables"][0]["cells"][-1][0]["lines"][0], target_bbox_dict_2)

0 comments on commit d2fdbb3

Please sign in to comment.