From 1dd9d61b0927fa1cb866dd9c0d3a4328d4a12608 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 21:17:56 -0400 Subject: [PATCH 01/25] spelling: a Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/utils.py b/unstructured_inference/utils.py index 696a2e8a..683d5e20 100644 --- a/unstructured_inference/utils.py +++ b/unstructured_inference/utils.py @@ -50,7 +50,7 @@ def __len__(self) -> int: def tag(elements: Iterable[LayoutElement]): - """Asign an numeric id to the elements in the list. + """Asign a numeric id to the elements in the list. Useful for debugging""" colors = ["red", "blue", "green", "magenta", "brown"] for i, e in enumerate(elements): From 9ff23c28a42c984555f141c328348c455558f3ca Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:09:20 -0400 Subject: [PATCH 02/25] spelling: assign Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/utils.py b/unstructured_inference/utils.py index 683d5e20..95ef7e08 100644 --- a/unstructured_inference/utils.py +++ b/unstructured_inference/utils.py @@ -50,7 +50,7 @@ def __len__(self) -> int: def tag(elements: Iterable[LayoutElement]): - """Asign a numeric id to the elements in the list. + """Assign a numeric id to the elements in the list. Useful for debugging""" colors = ["red", "blue", "green", "magenta", "brown"] for i, e in enumerate(elements): From d6c0be684e8b5df41f23ecd1ea422dbf53d23cb7 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:11:21 -0400 Subject: [PATCH 03/25] spelling: assigned Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/yolox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py index 0acd93f3..89647630 100644 --- a/unstructured_inference/models/yolox.py +++ b/unstructured_inference/models/yolox.py @@ -99,7 +99,7 @@ def image_processing( origin_img If specified, an Image object for process with YoloX model page_number - Number asigned to the PageLayout returned + Number assigned to the PageLayout returned output_directory Boolean indicating if result will be stored """ From 44207a1dbcaac326b1e35df0e7234171c40d841a Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:09:28 -0400 Subject: [PATCH 04/25] spelling: assume Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/inference/layoutelement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/inference/layoutelement.py b/unstructured_inference/inference/layoutelement.py index 37a9ef24..2b2c6a1d 100644 --- a/unstructured_inference/inference/layoutelement.py +++ b/unstructured_inference/inference/layoutelement.py @@ -178,7 +178,7 @@ def separate(region_a: Rectangle, region_b: Rectangle): """Reduce leftmost rectangle to don't overlap with the other""" def reduce(keep: Rectangle, reduce: Rectangle): - # Asume intersection + # Assume intersection # Other is down if reduce.y2 > keep.y2 and reduce.x1 < keep.x2: From e35d018ab3535bd70197d749f64d05f3d2500fdb Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:11:07 -0400 Subject: [PATCH 05/25] spelling: between Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d77591d..c6eda26a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,7 +72,7 @@ Fix syntax for generated HTML tables ## 0.7.22 -* fix: add logic to handle computation of intersections betwen 2 `Rectangle`s when a `Rectangle` has `None` value in its coordinates +* fix: add logic to handle computation of intersections between 2 `Rectangle`s when a `Rectangle` has `None` value in its coordinates ## 0.7.21 From 6a61937096529c34f0fd5453de4567d56beee666 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:11:34 -0400 Subject: [PATCH 06/25] spelling: bounding Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/chipper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index 857c83e9..12761bac 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -658,7 +658,7 @@ def reduce_bbox_overlap( input_bbox: List[float], ) -> List[float]: """ - If an element does overlap with other elements, reduce bouding box by selecting the largest + If an element does overlap with other elements, reduce bounding box by selecting the largest bbox after blurring existing text """ input_bbox = [int(b) for b in input_bbox] From fedef7f480391e5a96f36c4a9c116c475355fc9e Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:08:56 -0400 Subject: [PATCH 07/25] spelling: cannot Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- test_unstructured_inference/test_utils.py | 2 +- unstructured_inference/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test_unstructured_inference/test_utils.py b/test_unstructured_inference/test_utils.py index 399ca739..874a13e3 100644 --- a/test_unstructured_inference/test_utils.py +++ b/test_unstructured_inference/test_utils.py @@ -77,7 +77,7 @@ def test_pad_image_with_background_color(mock_pil_image): def test_pad_image_with_invalid_input(mock_pil_image): - with pytest.raises(ValueError, match="Can not pad an image with negative space!"): + with pytest.raises(ValueError, match="Cannot pad an image with negative space!"): pad_image_with_background_color(mock_pil_image, -1) diff --git a/unstructured_inference/utils.py b/unstructured_inference/utils.py index 95ef7e08..46affad1 100644 --- a/unstructured_inference/utils.py +++ b/unstructured_inference/utils.py @@ -72,7 +72,7 @@ def pad_image_with_background_color( width, height = image.size if pad < 0: raise ValueError( - "Can not pad an image with negative space! Please use a positive value for `pad`.", + "Cannot pad an image with negative space! Please use a positive value for `pad`.", ) new = Image.new(image.mode, (width + pad * 2, height + pad * 2), background_color) new.paste(image, (pad, pad)) From fd076d70c728d9aac15e3c9c7e186ee4dbad34fb Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:26:07 -0400 Subject: [PATCH 08/25] spelling: containing Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/tables.py b/unstructured_inference/models/tables.py index c390378e..655746b7 100644 --- a/unstructured_inference/models/tables.py +++ b/unstructured_inference/models/tables.py @@ -84,7 +84,7 @@ def get_structure( x: PILImage.Image, pad_for_structure_detection: int = inference_config.TABLE_IMAGE_BACKGROUND_PAD, ) -> dict: - """get the table structure as a dictionary contaning different types of elements as + """get the table structure as a dictionary containing different types of elements as key-value pairs; check table-transformer documentation for more information""" with torch.no_grad(): encoding = self.feature_extractor( From af27b6139ff84b2855d5df01b5b1f0dccd3ca869 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:09:48 -0400 Subject: [PATCH 09/25] spelling: currently Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/detectron2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/detectron2onnx.py b/unstructured_inference/models/detectron2onnx.py index 79cd0a1a..bcf46baf 100644 --- a/unstructured_inference/models/detectron2onnx.py +++ b/unstructured_inference/models/detectron2onnx.py @@ -131,7 +131,7 @@ def preprocess(self, image: Image.Image) -> Dict[str, np.ndarray]: """ # TODO (benjamin): check other shapes for inference img = np.array(image) - # TODO (benjamin): We should use models.get_model() but currenly returns Detectron model + # TODO (benjamin): We should use models.get_model() but currently returns Detectron model session = self.model # onnx input expected # [3,1035,800] From 9e5aa442ea1af09522227a41c19525c5ca0cd81a Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:26:15 -0400 Subject: [PATCH 10/25] spelling: debug Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/logger.py b/unstructured_inference/logger.py index c9645ac1..c0b69384 100644 --- a/unstructured_inference/logger.py +++ b/unstructured_inference/logger.py @@ -2,7 +2,7 @@ def translate_log_level(level: int) -> int: - """Translate Python debugg level to ONNX runtime error level + """Translate Python debug level to ONNX runtime error level since blank pages error are shown at level 3 that should be the exception, and 4 the normal behavior""" level_name = logging.getLevelName(level) From 25bf94fa98228fc32768ae1796737dab918ffb1e Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:26:24 -0400 Subject: [PATCH 11/25] spelling: detected Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- test_unstructured_inference/models/test_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_unstructured_inference/models/test_tables.py b/test_unstructured_inference/models/test_tables.py index 15c467cd..4eb0c665 100644 --- a/test_unstructured_inference/models/test_tables.py +++ b/test_unstructured_inference/models/test_tables.py @@ -1763,7 +1763,7 @@ def test_padded_results_has_right_dimensions(table_transformer, example_image): pad = int(min(example_image.size) / 10) structure = table_transformer.get_structure(example_image, pad_for_structure_detection=pad) - # boxes deteced OUTSIDE of the original image; this shouldn't happen but we want to make sure + # boxes detected OUTSIDE of the original image; this shouldn't happen but we want to make sure # the code handles it as expected structure["pred_boxes"][0][0, :2] = 0.5 structure["pred_boxes"][0][0, 2:] = 1.0 From 87f529273edcbe94bcdd415f14d32b729a8f0a50 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:26:48 -0400 Subject: [PATCH 12/25] spelling: detectron Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/detectron2onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/detectron2onnx.py b/unstructured_inference/models/detectron2onnx.py index bcf46baf..f9d87b6f 100644 --- a/unstructured_inference/models/detectron2onnx.py +++ b/unstructured_inference/models/detectron2onnx.py @@ -48,7 +48,7 @@ "model_path": os.path.join( HUGGINGFACE_HUB_CACHE, "detectron2_quantized", - "detectrin2_quantized.onnx", + "detectron2_quantized.onnx", ), "label_map": DEFAULT_LABEL_MAP, "confidence_threshold": 0.8, From 314069302e6a96c3624df2915ecd9f30d83730b1 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:27:06 -0400 Subject: [PATCH 13/25] spelling: distinct Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/yolox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py index 89647630..48e0b9c7 100644 --- a/unstructured_inference/models/yolox.py +++ b/unstructured_inference/models/yolox.py @@ -125,7 +125,7 @@ def image_processing( boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0 boxes_xyxy /= ratio - # Note (Benjamin): Distinct models (quantized and original) requires distincts + # Note (Benjamin): Distinct models (quantized and original) requires distinct # levels of thresholds if "quantized" in self.model_path: dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.0, score_thr=0.07) From 485136e3dba9fab782122c648de85d6d9fbdfa7f Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:27:16 -0400 Subject: [PATCH 14/25] spelling: environment Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6eda26a..951e902a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -215,7 +215,7 @@ we have the mapping from standard language code to paddle language code. ## 0.6.0 -* add a config class to handle parameter configurations for inference tasks; parameters in the config class can be set via environement variables +* add a config class to handle parameter configurations for inference tasks; parameters in the config class can be set via environment variables * update behavior of `pad_image_with_background_color` so that input `pad` is applied to all sides ## 0.5.31 From 9be896167a79faa2f2d223d2890298008c62d92c Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:09:05 -0400 Subject: [PATCH 15/25] spelling: github Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ac759757..044706c0 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,6 @@ information on how to report security vulnerabilities. | Section | Description | |-|-| -| [Unstructured Community Github](https://github.com/Unstructured-IO/community) | Information about Unstructured.io community projects | -| [Unstructured Github](https://github.com/Unstructured-IO) | Unstructured.io open source repositories | +| [Unstructured Community GitHub](https://github.com/Unstructured-IO/community) | Information about Unstructured.io community projects | +| [Unstructured GitHub](https://github.com/Unstructured-IO) | Unstructured.io open source repositories | | [Company Website](https://unstructured.io) | Unstructured.io product and company info | From 5dc848d21271e9683c806f9a6921c8790182711c Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:27:33 -0400 Subject: [PATCH 16/25] spelling: initialization Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 951e902a..fee5d536 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -111,7 +111,7 @@ Fix syntax for generated HTML tables * refactor: add a class `ElementType` for the element type constants and use the constants to replace element type strings * enhancement: support extracting elements with types `Picture` and `Figure` -* fix: update logger in table initalization where the logger info was not showing +* fix: update logger in table initialization where the logger info was not showing * chore: supress UserWarning about specified model providers ## 0.7.12 From 53564081411b4b1ce9796ad2a2407accab3631e5 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:08:44 -0400 Subject: [PATCH 17/25] spelling: macos Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 044706c0..fdb502c1 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Run `pip install unstructured-inference`. [Detectron2](https://github.com/facebookresearch/detectron2) is required for using models from the [layoutparser model zoo](#using-models-from-the-layoutparser-model-zoo) but is not automatically installed with this package. -For MacOS and Linux, build from source with: +For macOS and Linux, build from source with: ```shell pip install 'git+https://github.com/facebookresearch/detectron2.git@57bdb21249d5418c130d54e2ebdc94dda7a4c01a' ``` From a2f6a1fc96978651eeadae83305d4d2880bfe5b5 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:28:08 -0400 Subject: [PATCH 18/25] spelling: package Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- test_unstructured_inference/models/test_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_unstructured_inference/models/test_tables.py b/test_unstructured_inference/models/test_tables.py index 4eb0c665..67ad8ab6 100644 --- a/test_unstructured_inference/models/test_tables.py +++ b/test_unstructured_inference/models/test_tables.py @@ -927,7 +927,7 @@ def test_table_prediction_output_format( assert expectation in result.values elif output_format == "cells": # other output like bbox are flakey to test since they depend on OCR and it may change - # slightly when OCR pacakge changes or even on different machines + # slightly when OCR package changes or even on different machines validation_fields = ("column_nums", "row_nums", "column header", "cell text") assert expectation in [{key: cell[key] for key in validation_fields} for cell in result] else: From 9a2f15c04329b5b714175af8df52f9a4820d0956 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:28:26 -0400 Subject: [PATCH 19/25] spelling: received Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/chipper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index 12761bac..b363490f 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -137,7 +137,7 @@ def initialize( else: if swap_head_hidden_layer_size is not None: logger.warning( - f"swap_head is False but recieved value {swap_head_hidden_layer_size} for " + f"swap_head is False but received value {swap_head_hidden_layer_size} for " "swap_head_hidden_layer_size, which will be ignored.", ) From 9b3a070dec55af1159250b9eb7fb18c1fd32165f Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:28:49 -0400 Subject: [PATCH 20/25] spelling: repetition Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- test_unstructured_inference/models/test_chippermodel.py | 8 ++++---- unstructured_inference/models/chipper.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test_unstructured_inference/models/test_chippermodel.py b/test_unstructured_inference/models/test_chippermodel.py index c68aa6bc..ad51dc53 100644 --- a/test_unstructured_inference/models/test_chippermodel.py +++ b/test_unstructured_inference/models/test_chippermodel.py @@ -190,11 +190,11 @@ def test_no_repeat_ngram_logits(): ) -def test_ngram_repetiton_stopping_criteria(): +def test_ngram_repetition_stopping_criteria(): input_ids = torch.tensor([[1, 2, 3, 4, 0, 1, 2, 3, 4]]) logits = torch.tensor([[0.1, -0.3, -0.5, 0, 1.0, -0.9]]) - stoppingCriteria = chipper.NGramRepetitonStoppingCriteria( + stoppingCriteria = chipper.NGramRepetitionStoppingCriteria( repetition_window=2, skip_tokens={0, 1, 2, 3, 4} ) @@ -202,7 +202,7 @@ def test_ngram_repetiton_stopping_criteria(): assert output is False - stoppingCriteria = chipper.NGramRepetitonStoppingCriteria( + stoppingCriteria = chipper.NGramRepetitionStoppingCriteria( repetition_window=2, skip_tokens={1, 2, 3, 4} ) output = stoppingCriteria(input_ids=input_ids, scores=logits) @@ -259,7 +259,7 @@ def test_postprocess_bbox(decoded_str, expected_classes): def test_predict_tokens_beam_indices(): model = get_model("chipper") model.stopping_criteria = [ - chipper.NGramRepetitonStoppingCriteria( + chipper.NGramRepetitionStoppingCriteria( repetition_window=1, skip_tokens={}, ), diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index b363490f..4f9305e8 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -102,7 +102,7 @@ def initialize( ] self.stopping_criteria = [ - NGramRepetitonStoppingCriteria( + NGramRepetitionStoppingCriteria( repetition_window=30, skip_tokens=get_table_token_ids(self.processor), ), @@ -1027,7 +1027,7 @@ def __call__( ) -class NGramRepetitonStoppingCriteria(StoppingCriteria): +class NGramRepetitionStoppingCriteria(StoppingCriteria): def __init__(self, repetition_window: int, skip_tokens: set = set()): self.repetition_window = repetition_window self.skip_tokens = skip_tokens From 31333cc8c98ae109ad428d225ecd7e90df791df2 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:28:53 -0400 Subject: [PATCH 21/25] spelling: replace Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fee5d536..f3282c2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -256,7 +256,7 @@ we have the mapping from standard language code to paddle language code. ## 0.5.21 -* adds `safe_division` to replae 0 with machine epsilon for `float` to avoid division by 0 +* adds `safe_division` to replace 0 with machine epsilon for `float` to avoid division by 0 * apply `safe_division` to area overlap calculations in `unstructured_inference/inference/elements.py` ## 0.5.20 From 12f2b4444662d48a90ef49cc1488c8e271a32f71 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:29:02 -0400 Subject: [PATCH 22/25] spelling: running Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- unstructured_inference/models/yolox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py index 48e0b9c7..5c6041a1 100644 --- a/unstructured_inference/models/yolox.py +++ b/unstructured_inference/models/yolox.py @@ -91,7 +91,7 @@ def image_processing( self, image: PILImage.Image, ) -> List[LayoutElement]: - """Method runing YoloX for layout detection, returns a PageLayout + """Method running YoloX for layout detection, returns a PageLayout parameters ---------- page From c633443e66c6ab6df7e1144b94511023bb3fe6b2 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 20:12:03 -0400 Subject: [PATCH 23/25] spelling: safely Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- test_unstructured_inference/models/test_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_unstructured_inference/models/test_tables.py b/test_unstructured_inference/models/test_tables.py index 67ad8ab6..4c0d8155 100644 --- a/test_unstructured_inference/models/test_tables.py +++ b/test_unstructured_inference/models/test_tables.py @@ -1767,7 +1767,7 @@ def test_padded_results_has_right_dimensions(table_transformer, example_image): # the code handles it as expected structure["pred_boxes"][0][0, :2] = 0.5 structure["pred_boxes"][0][0, 2:] = 1.0 - # mock a box we know are safly inside the original image with known positions + # mock a box we know are safely inside the original image with known positions width, height = example_image.size padded_width = width + pad * 2 padded_height = height + pad * 2 From 07da66a893227c3109cce146e8d311c9d3de34d8 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:29:20 -0400 Subject: [PATCH 24/25] spelling: suppress Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3282c2a..ae0ccb0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,7 +112,7 @@ Fix syntax for generated HTML tables * refactor: add a class `ElementType` for the element type constants and use the constants to replace element type strings * enhancement: support extracting elements with types `Picture` and `Figure` * fix: update logger in table initialization where the logger info was not showing -* chore: supress UserWarning about specified model providers +* chore: suppress UserWarning about specified model providers ## 0.7.12 From d2fcce2fa76a677f61b03612b934e5e60170bcbe Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:29:15 -0400 Subject: [PATCH 25/25] spelling: suppressed Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae0ccb0b..18b42cf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -346,7 +346,7 @@ we have the mapping from standard language code to paddle language code. * Added functionality to convert a PDF in small chunks of pages at a time for `pdf2image.convert_from_path` * Table processing check for the area of the package to fix division by zero bug * Added CUDA and TensorRT execution providers for yolox and detectron2onnx model. -* Warning for onnx version of detectron2 for empty pages suppresed. +* Warning for onnx version of detectron2 for empty pages suppressed. ## 0.5.4