Merge branch 'maint/check-spelling' into 'main'

MAINT: Fix spelling mistakes detected by codespell See merge request heka/medkit!249 changelog: MAINT: Fix spelling mistakes detected by codespell
medkit-lib · Dec 4, 2023 · 2e8d39e · 2e8d39e
2 parents 0bb1a9d + 119e38e
commit 2e8d39e
Show file tree

Hide file tree

Showing 20 changed files with 62 additions and 59 deletions.
diff --git a/docs/api/text.md b/docs/api/text.md
@@ -553,7 +553,7 @@ Medkit provides some modules to facilitate post-processing operations.
 
 For the moment, you can use this module to:
 - align source and target {class}`~.core.text.Segment`s from the same {class}`~.core.text.TextDocument`
-- duplicate attributes bewteen segments. For example, you can duplicate an attribute from a sentence to its entities.
+- duplicate attributes between segments. For example, you can duplicate an attribute from a sentence to its entities.
 
 - filter overlapping entities: useful when creating named entity reconigtion (NER) datasets
 - create mini-documents from a {class}`~.core.text.TextDocument`. 
@@ -581,7 +581,7 @@ For more details about public APIs, refer to {mod}`~.text.metrics`
 
 Medkit provides {class}`~.metrics.classification.TextClassificationEvaluator`, an evaluator for document attributes. You can compute the following metrics depending on your use-case:
 
-### Classification repport
+### Classification report
 -  `compute_classification_report`: To compare a list of reference and predicted documents. This method uses [sklearn](https://scikit-learn.org/stable/index.html) as backend to compute precision, recall, and F1-score.
 
 ### Inter-rated agreement

diff --git a/docs/api/training.md b/docs/api/training.md
@@ -18,13 +18,13 @@ A component can implement the {class}`~.training.TrainableComponent` protocol to
 
 The following table explains who makes the calls and where they make them:  
 
-| Who                | Where             | A TrainableComponent                                                                                     |
-| ------------------ | ----------------- | -------------------------------------------------------------------------------------------------------- |
-| TrainableComponent | Initialization    | **load** : load/initialize modules to be trained                                                         |
-| Trainer            | Initialization    | **create_optimizer** : define an optimizer for the training/evalution loop                               |
-|                    | Data loading      | **preproces**: transform medkit anns to input data <br>**collate**: creates a BatchData using input data |
-|                    | Forward step      | **forward**: call internal model, return loss and model output                                           |
-|                    | Saving checkpoint | **save**: save trained modules                                                                           |
+| Who                | Where             | A TrainableComponent                                                                                      |
+|--------------------|-------------------|-----------------------------------------------------------------------------------------------------------|
+| TrainableComponent | Initialization    | **load** : load/initialize modules to be trained                                                          |
+| Trainer            | Initialization    | **create_optimizer** : define an optimizer for the training / evaluation loop                             |
+|                    | Data loading      | **preprocess**: transform medkit anns to input data <br>**collate**: creates a BatchData using input data |
+|                    | Forward step      | **forward**: call internal model, return loss and model output                                            |
+|                    | Saving checkpoint | **save**: save trained modules                                                                            |
 
 ### A trainable component to detect entities
 

diff --git a/docs/examples/edsnlp.md b/docs/examples/edsnlp.md
@@ -138,7 +138,7 @@ Here are the supported EDS-NLP attributes values and the corresponding medkit cl
 - `Duration` (created by `eds.dates`): {class}`medkit.text.ner.DurationAttribute`
 
 ```{note}
-The transformations performed by {class}`~.EDSNLPDocPipeline` can be overriden
+The transformations performed by {class}`~.EDSNLPDocPipeline` can be overridden
 or extended with the `medkit_attribute_factories` init parameter. For a list of
 all the default transformations, see
 {const}`~medkit.text.spacy.edsnlp.DEFAULT_ATTRIBUTE_FACTORIES` and corresponding

diff --git a/docs/user_guide/first_steps.md b/docs/user_guide/first_steps.md
@@ -183,7 +183,7 @@ this case, determined by the rule that was used to match it).
 
 ## Detecting negation
 
-So far we have detected several entities with `"problem"` or `"treatement"`
+So far we have detected several entities with `"problem"` or `"treatment"`
 labels in our document. We might be tempted to use them directly to build a list
 of problems that the patient faces and treatments that were given, but if we
 look at how these entities are used in the document, we will see that some of

diff --git a/medkit/core/audio/audio_buffer.py b/medkit/core/audio/audio_buffer.py
@@ -238,7 +238,7 @@ def __eq__(self, other: object) -> bool:
 
 
 class MemoryAudioBuffer(AudioBuffer):
-    """Audio buffer giving acces to signals stored in memory
+    """Audio buffer giving access to signals stored in memory
     (to use when reading/writing a modified audio signal)."""
 
     def __init__(self, signal: np.ndarray, sample_rate: int):

diff --git a/medkit/core/text/span_utils.py b/medkit/core/text/span_utils.py
@@ -121,9 +121,9 @@ def _replace_in_spans(spans, ranges, replacement_lengths):
     while span_index < len(spans) or range_index < len(ranges):
         # iterate to next range if current range has been fully handled
         if range_index < len(ranges) and range_end <= span_start:
-            # we have encountered all spans overlaping with the range to replace,
-            # and we have stored the overlaping parts in replaced_spans.
-            # create new ModifiedSpan referrencing all the replaced_spans
+            # we have encountered all spans overlapping with the range to replace,
+            # and we have stored the overlapping parts in replaced_spans.
+            # create new ModifiedSpan referencing all the replaced_spans
             # and add it to output
             # (expect if replacement_length is 0, in which case the spans were
             # just removed)
@@ -211,7 +211,7 @@ def _replace_in_spans(spans, ranges, replacement_lengths):
                 span = ModifiedSpan(
                     length=length_after_range, replaced_spans=span.replaced_spans
                 )
-        # update span_start to point to the begining of the remainder
+        # update span_start to point to the beginning of the remainder
         span_start = span_end - length_after_range
 
     return output_spans

diff --git a/medkit/text/ner/adicap_norm_attribute.py b/medkit/text/ner/adicap_norm_attribute.py
@@ -15,7 +15,7 @@ class ADICAPNormAttribute(EntityNormAttribute):
     Attribute describing tissue sample using the ADICAP (Association pour le
     Développement de l'Informatique en Cytologie et Anatomo-Pathologie) coding.
 
-    Cf https://smt.esante.gouv.fr/wp-json/ans/terminologies/document?terminologyId=terminologie-adicap&fileName=cgts_sem_adicap_fiche-detaillee.pdf for a complete description of the coding.
+    See <https://smt.esante.gouv.fr/wp-json/ans/terminologies/document?terminologyId=terminologie-adicap&fileName=cgts_sem_adicap_fiche-detaillee.pdf> for a complete description of the coding.
 
     This class is replicating EDS-NLP's `Adicap` class, making it a medkit
     `Attribute`.

diff --git a/medkit/text/ner/hf_tokenization_utils.py b/medkit/text/ner/hf_tokenization_utils.py
@@ -210,12 +210,12 @@ def align_and_map_tokens_with_tags(
     >>> print(text_encoding.tokens)
     ['[CLS]', 'med',##kit', '[SEP]']
 
-    Maping all tags to tags_ids
+    Mapping all tags to tags_ids
 
     >>> tags_ids = align_and_map_tokens_with_tags(text_encoding, tags,tag_to_id)
     >>> assert tags_ids == [-100, 1, 2, -100]
 
-    Maping only first tag in tokens
+    Mapping only first tag in tokens
 
     >>> tags_ids = align_and_map_tokens_with_tags(text_encoding, tags, tag_to_id,False)
     >>> assert tags_ids == [-100, 1, -100, -100]

diff --git a/medkit/text/spacy/spacy_utils.py b/medkit/text/spacy/spacy_utils.py
@@ -280,7 +280,7 @@ def build_spacy_doc_from_medkit_segment(
     # include annotations in the Doc object
     # define custom attributes in spacy from selected annotations
     if attrs is None:
-        # include all atributes
+        # include all attributes
         attrs = set(attr.label for ann in annotations for attr in ann.attrs)
     _define_attrs_extensions(attrs)
 
@@ -371,7 +371,7 @@ def _get_defined_spacy_attrs(include_medkit_attrs: bool = False) -> List[str]:
     Parameters
     ----------
     include_medkit_attrs:
-        If True, medkit attrs (attrs transfered from medkit) are included
+        If True, medkit attrs (attrs transferred from medkit) are included
 
     Returns
     -------

diff --git a/medkit/text/translation/hf_translator.py b/medkit/text/translation/hf_translator.py
@@ -286,7 +286,7 @@ def align(
             target_texts
         ), "Must have same number of source and target texts"
 
-        aligments = []
+        alignments = []
         source_text_batches_iter = medkit.core.utils.batch_list(
             source_texts, self._batch_size
         )
@@ -296,8 +296,8 @@ def align(
         for source_text_batch, target_text_batch in zip(
             source_text_batches_iter, target_text_batches_iter
         ):
-            aligments += self._align_batch(source_text_batch, target_text_batch)
-        return aligments
+            alignments += self._align_batch(source_text_batch, target_text_batch)
+        return alignments
 
     def _align_batch(self, source_texts, target_texts):
         # preprocess

diff --git a/tests/unit/core/audio/test_document.py b/tests/unit/core/audio/test_document.py
@@ -70,7 +70,7 @@ def test_raw_segment():
     raw_seg = doc_with_raw_audio.raw_segment
     assert raw_seg.audio == audio
 
-    # also available trough get() and get_by_id()
+    # also available through get() and get_by_id()
     assert doc_with_raw_audio.anns.get(label=AudioDocument.RAW_LABEL) == [raw_seg]
     assert doc_with_raw_audio.anns.get_by_id(raw_seg.uid) == raw_seg
     # but not included in full annotations list

diff --git a/tests/unit/core/prov_tracer/test_prov_tracer.py b/tests/unit/core/prov_tracer/test_prov_tracer.py
@@ -65,7 +65,10 @@ def test_multiple_items_with_sources():
 
 
 def test_intermediate_operation():
-    """Input items passed to an intermediate operation, then intermediate items passed to another operatio"""
+    """
+    Input items passed to an intermediate operation,
+    then intermediate items passed to another operation.
+    """
     tracer = ProvTracer()
     # generate 2 items and prefix them twice with 2 different operations
     generator = Generator(tracer)
@@ -93,7 +96,7 @@ def test_intermediate_operation():
         assert prefixed_prov_1.op_desc == prefixer_1.description
         # 1st prefixed item was derived from input item
         assert prefixed_prov_1.source_data_items == [input_item]
-        # 1st prefixed item was used to derive 2st prefixed item
+        # 1st prefixed item was used to derive 2nd prefixed item
         assert prefixed_prov_1.derived_data_items == [prefixed_item_2]
 
         prefixed_prov_2 = tracer.get_prov(prefixed_item_2.uid)

diff --git a/tests/unit/core/text/test_span_utils.py b/tests/unit/core/text/test_span_utils.py
@@ -83,7 +83,7 @@ def test_move_after():
 def test_replace_in_spans():
     # only one span, starting at 0
     spans = [Span(0, 10)]
-    # replace begining
+    # replace beginning
     assert _replace_in_spans(spans, [(0, 6)], [6]) == [
         ModifiedSpan(6, [Span(0, 6)]),
         Span(6, 10),
@@ -114,7 +114,7 @@ def test_replace_in_spans():
 
     # only one span with non-zero start
     spans = [Span(10, 20)]
-    # replace begining (same length)
+    # replace beginning (same length)
     assert _replace_in_spans(spans, [(0, 6)], [6]) == [
         ModifiedSpan(6, [Span(10, 16)]),
         Span(16, 20),
@@ -164,14 +164,14 @@ def test_replace_in_spans():
         Span(30, 40),
         Span(50, 60),
     ]
-    # replace across several spans (end of 1st span and begining of 2d span)
+    # replace across several spans (end of 1st span and beginning of 2d span)
     assert _replace_in_spans(spans, [(4, 14)], [10]) == [
         Span(10, 14),
         ModifiedSpan(10, [Span(14, 20), Span(30, 34)]),
         Span(34, 40),
         Span(50, 60),
     ]
-    # replace across several spans (end of 1st span, entire 2d span, begining of 3d span)
+    # replace across several spans (end of 1st span, entire 2d span, beginning of 3d span)
     assert _replace_in_spans(spans, [(4, 24)], [10]) == [
         Span(10, 14),
         ModifiedSpan(10, [Span(14, 20), Span(30, 40), Span(50, 54)]),
@@ -192,14 +192,14 @@ def test_replace_in_spans():
         Span(30, 40),
         Span(50, 60),
     ]
-    # replace across several spans (end of 1st span and begining of 2d span)
+    # replace across several spans (end of 1st span and beginning of 2d span)
     assert _replace_in_spans(spans, [(4, 14)], [5]) == [
         ModifiedSpan(4, [Span(10, 30)]),
         ModifiedSpan(5, [Span(10, 30), Span(30, 39)]),
         Span(39, 40),
         Span(50, 60),
     ]
-    # replace accross several spans (remove end of 1st span, remove 2d span fully, remove begining of last span)
+    # replace across several spans (remove end of 1st span, remove 2d span fully, remove beginning of last span)
     assert _replace_in_spans(spans, [(4, 24)], [5]) == [
         ModifiedSpan(4, [Span(10, 30)]),
         ModifiedSpan(5, [Span(10, 30), Span(30, 40), Span(50, 59)]),
@@ -219,7 +219,7 @@ def test_replace_in_spans():
 def test_remove_in_spans():
     # only one span
     spans = [Span(10, 20)]
-    # remove at begining
+    # remove at beginning
     assert _remove_in_spans(spans, [(0, 6)]) == [Span(16, 20)]
     # remove at end
     assert _remove_in_spans(spans, [(4, 10)]) == [Span(10, 14)]
@@ -245,13 +245,13 @@ def test_remove_in_spans():
         Span(30, 40),
         Span(50, 60),
     ]
-    # remove accross several spans (end of 1st span and begining of 2d span)
+    # remove across several spans (end of 1st span and beginning of 2d span)
     assert _remove_in_spans(spans, [(4, 14)]) == [
         Span(10, 14),
         Span(34, 40),
         Span(50, 60),
     ]
-    # remove accross several spans (remove end of 1st span, remove 2d span fully, remove begining of last span)
+    # remove across several spans (remove end of 1st span, remove 2d span fully, remove beginning of last span)
     assert _remove_in_spans(spans, [(4, 24)]) == [Span(10, 14), Span(54, 60)]
     # remove several ranges
     assert _remove_in_spans(spans, [(4, 14), (16, 24)]) == [
@@ -262,7 +262,7 @@ def test_remove_in_spans():
 
     # additional span
     spans = [ModifiedSpan(length=10, replaced_spans=[Span(10, 30)])]
-    # remove at begining
+    # remove at beginning
     assert _remove_in_spans(spans, [(0, 6)]) == [ModifiedSpan(4, [Span(10, 30)])]
     # remove at end
     assert _remove_in_spans(spans, [(4, 10)]) == [ModifiedSpan(4, [Span(10, 30)])]
@@ -286,7 +286,7 @@ def test_remove_in_spans():
 
     # mix of additional spans and normal spans
     spans = [ModifiedSpan(length=10, replaced_spans=[Span(10, 30)]), Span(30, 40)]
-    # remove accross both (end of 1st pan and begining of 2d span)
+    # remove across both (end of 1st pan and beginning of 2d span)
     assert _remove_in_spans(spans, [(4, 14)]) == [
         ModifiedSpan(4, [Span(10, 30)]),
         Span(34, 40),
@@ -302,7 +302,7 @@ def test_remove_in_spans():
 def test_extract_in_spans():
     # only one span
     spans = [Span(10, 20)]
-    # extract begining
+    # extract beginning
     assert _extract_in_spans(spans, [(0, 6)]) == [Span(10, 16)]
     # extract end
     assert _extract_in_spans(spans, [(4, 10)]) == [Span(14, 20)]
@@ -318,12 +318,12 @@ def test_extract_in_spans():
     spans = [Span(10, 20), Span(30, 40), Span(50, 60)]
     # extract end of 1st span
     assert _extract_in_spans(spans, [(4, 10)]) == [Span(14, 20)]
-    # extract in several spans (end of 1st span and begining of 2d span)
+    # extract in several spans (end of 1st span and beginning of 2d span)
     assert _extract_in_spans(spans, [(4, 14)]) == [
         Span(14, 20),
         Span(30, 34),
     ]
-    # extract in several spans (end of 1st span, entire 2d span, begining of 3d span)
+    # extract in several spans (end of 1st span, entire 2d span, beginning of 3d span)
     assert _extract_in_spans(spans, [(4, 24)]) == [
         Span(14, 20),
         Span(30, 40),
@@ -339,7 +339,7 @@ def test_extract_in_spans():
 
     # additional span
     spans = [ModifiedSpan(length=10, replaced_spans=[Span(10, 30)])]
-    # extract begining
+    # extract beginning
     assert _extract_in_spans(spans, [(0, 6)]) == [ModifiedSpan(6, [Span(10, 30)])]
     # extract end
     assert _extract_in_spans(spans, [(4, 10)]) == [ModifiedSpan(6, [Span(10, 30)])]
@@ -350,7 +350,7 @@ def test_extract_in_spans():
 
     # mix of additional spans and normal spans
     spans = [ModifiedSpan(length=10, replaced_spans=[Span(10, 30)]), Span(30, 40)]
-    # extract in both (end of 1st pan and begining of 2d span)
+    # extract in both (end of 1st pan and beginning of 2d span)
     assert _extract_in_spans(spans, [(4, 14)]) == [
         ModifiedSpan(6, [Span(10, 30)]),
         Span(30, 34),
@@ -360,7 +360,7 @@ def test_extract_in_spans():
 def test_insert_in_spans():
     # only one span
     spans = [Span(10, 20)]
-    # insert at begining
+    # insert at beginning
     assert _insert_in_spans(spans, [0], [5]) == [ModifiedSpan(5, []), Span(10, 20)]
     # insert at end
     assert _insert_in_spans(spans, [10], [5]) == [Span(10, 20), ModifiedSpan(5, [])]
@@ -381,7 +381,7 @@ def test_insert_in_spans():
 
     # additional span
     spans = [ModifiedSpan(length=10, replaced_spans=[Span(20, 40)])]
-    # insert at begining
+    # insert at beginning
     assert _insert_in_spans(spans, [0], [5]) == [
         ModifiedSpan(5, []),
         ModifiedSpan(10, [Span(20, 40)]),
@@ -402,17 +402,17 @@ def test_insert_in_spans():
 def test_move_in_spans():
     # only one span
     spans = [Span(10, 30)]
-    # move from begining to end
+    # move from beginning to end
     assert _move_in_spans(spans, (0, 5), 20) == [Span(15, 30), Span(10, 15)]
-    # move from end to begining
+    # move from end to beginning
     assert _move_in_spans(spans, (15, 20), 0) == [Span(25, 30), Span(10, 25)]
     # move from inside to end
     assert _move_in_spans(spans, (5, 10), 20) == [
         Span(10, 15),
         Span(20, 30),
         Span(15, 20),
     ]
-    # move from inside to begining
+    # move from inside to beginning
     assert _move_in_spans(spans, (5, 10), 0) == [
         Span(15, 20),
         Span(10, 15),
@@ -428,7 +428,7 @@ def test_move_in_spans():
 
     # several spans
     spans = [Span(10, 30), Span(40, 60), Span(70, 90)]
-    # move from accross several spans
+    # move from across several spans
     assert _move_in_spans(spans, (5, 45), 50) == [
         Span(10, 15),
         Span(75, 80),

diff --git a/tests/unit/io/test__brat_utils.py b/tests/unit/io/test__brat_utils.py
@@ -159,7 +159,7 @@ def test_attribute_conf_file():
     attr_conf = AttributeConf(from_entity=True, type="severity", value="low")
     conf_file.add_attribute_type(attr_conf)
 
-    # finally a brat relation has an attribure 'severity' value 'inter'
+    # finally a brat relation has an attribute 'severity' value 'inter'
     attr_conf = AttributeConf(from_entity=False, type="severity", value="inter")
     conf_file.add_attribute_type(attr_conf)
 

diff --git a/tests/unit/io/test_brat_output_converter.py b/tests/unit/io/test_brat_output_converter.py
@@ -211,7 +211,7 @@ def test_annotation_conf_file():
     )
     config_file = BratAnnConfiguration()
 
-    # simulate expected annotations relations + entitites
+    # simulate expected annotations relations + entities
     annotations = get_anns_by_type(medkit_doc, anns_labels=None)
     _ = brat_converter._convert_medkit_anns_to_brat(
         segments=annotations["entities"],
@@ -452,7 +452,7 @@ def test_convert_cuis_to_notes(tmp_path: Path):
     entity_1.attrs.add(UMLSNormAttribute(cui="C0004096", umls_version="2021AB"))
     doc.anns.add(entity_1)
 
-    # 2st entity with multiple norm attributes
+    # 2nd entity with multiple norm attributes
     entity_2 = Entity(label="maladie", text="asthme", spans=[Span(21, 27)])
     entity_2.attrs.add(UMLSNormAttribute(cui="C2631234", umls_version="2021AB"))
     entity_2.attrs.add(UMLSNormAttribute(cui="C2631237", umls_version="2021AB"))