From eb257dac273761b9696d72ffd0d95ff6353490ef Mon Sep 17 00:00:00 2001
From: jarbasal <jarbasai@mailfence.com>
Date: Mon, 10 May 2021 19:05:28 +0100
Subject: [PATCH 1/2] feat/number_spans

feat/normalize_decimals

port lingua_nostra/pull/20 - support decimal markers

rebase of https://github.com/MycroftAI/lingua-franca/pull/69

Co-authored-by: jarbasal <jarbasai@mailfence.com>
---
 lingua_franca/lang/common_data_en.py |   2 +-
 lingua_franca/lang/parse_common.py   |  12 +
 lingua_franca/lang/parse_cs.py       |  31 ++-
 lingua_franca/lang/parse_da.py       |  48 ++--
 lingua_franca/lang/parse_de.py       |  59 +++--
 lingua_franca/lang/parse_en.py       | 248 ++++++++++++++++++-
 lingua_franca/lang/parse_es.py       |  44 +++-
 lingua_franca/lang/parse_fa.py       |  32 ++-
 lingua_franca/lang/parse_fr.py       |  47 +++-
 lingua_franca/lang/parse_it.py       |  32 ++-
 lingua_franca/lang/parse_nl.py       |  37 ++-
 lingua_franca/lang/parse_pl.py       |  36 ++-
 lingua_franca/lang/parse_pt.py       |  22 +-
 lingua_franca/lang/parse_sv.py       |  11 +-
 lingua_franca/parse.py               | 135 +++++++++--
 requirements/requirements.txt        |   3 +-
 test/unittests/test_format_pt.py     |  11 +
 test/unittests/test_parse_en.py      | 344 ++++++++++++++++++++++++++-
 18 files changed, 1001 insertions(+), 153 deletions(-)

diff --git a/lingua_franca/lang/common_data_en.py b/lingua_franca/lang/common_data_en.py
index f2f8de1a..25de4cd9 100644
--- a/lingua_franca/lang/common_data_en.py
+++ b/lingua_franca/lang/common_data_en.py
@@ -247,7 +247,7 @@
 
 
 # negate next number (-2 = 0 - 2)
-_NEGATIVES_EN = {"negative", "minus"}
+_NEGATIVES_EN = {"negative", "minus", "-"}
 
 # sum the next number (twenty two = 20 + 2)
 _SUMS_EN = {'twenty', '20', 'thirty', '30', 'forty', '40', 'fifty', '50',
diff --git a/lingua_franca/lang/parse_common.py b/lingua_franca/lang/parse_common.py
index 97cf5be7..aef9e199 100644
--- a/lingua_franca/lang/parse_common.py
+++ b/lingua_franca/lang/parse_common.py
@@ -192,6 +192,18 @@ def normalize(self, utterance="", remove_articles=None):
         return utterance
 
 
+def normalize_decimals(text, decimal):
+    """
+        Replace 'decimal' with decimal periods so Python can floatify them
+    """
+    regex = r"\b\d+" + decimal + r"{1}\d+\b"
+    sanitize_decimals = re.compile(regex)
+    for _, match in enumerate(re.finditer(sanitize_decimals, text)):
+        text = text.replace(match.group(
+            0), match.group(0).replace(decimal, '.'))
+    return text
+
+
 def match_yes_or_no(text, lang):
     resource_file = resolve_resource_file(f"text/{lang}/yesno.json")
     if not resource_file:
diff --git a/lingua_franca/lang/parse_cs.py b/lingua_franca/lang/parse_cs.py
index e0144b02..0590dc12 100644
--- a/lingua_franca/lang/parse_cs.py
+++ b/lingua_franca/lang/parse_cs.py
@@ -23,7 +23,7 @@
     _LONG_ORDINAL_CS, _LONG_SCALE_CS, _SHORT_SCALE_CS, _SHORT_ORDINAL_CS, \
     _FRACTION_STRING_CS, _MONTHS_CONVERSION, _MONTHS_CZECH, _TIME_UNITS_CONVERSION, \
     _ORDINAL_BASE_CS  # _ARTICLES_CS
-
+from lingua_franca.lang.parse_common import normalize_decimals
 import re
 import json
 from lingua_franca import resolve_resource_file
@@ -579,7 +579,7 @@ def _initialize_number_data(short_scale):
     return multiplies, string_num_ordinal_cs, string_num_scale_cs
 
 
-def extract_number_cs(text, short_scale=True, ordinals=False):
+def extract_number_cs(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -590,11 +590,17 @@ def extract_number_cs(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_cs(tokenize(text.lower()),
                                         short_scale, ordinals).value
 
@@ -1560,20 +1566,25 @@ def isFractional_cs(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_cs(text, short_scale=True, ordinals=False):
+def extract_numbers_cs(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_cs(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_da.py b/lingua_franca/lang/parse_da.py
index 14b18132..13f0eff1 100644
--- a/lingua_franca/lang/parse_da.py
+++ b/lingua_franca/lang/parse_da.py
@@ -20,22 +20,31 @@
 from lingua_franca.lang.common_data_da import _DA_NUMBERS
 from lingua_franca.lang.format_da import pronounce_number_da
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
-def extract_number_da(text, short_scale=True, ordinals=False):
+def extract_number_da(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
-
-
-    undefined articles cannot be suppressed in German:
-    'ein Pferd' means 'one horse' and 'a horse'
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -869,20 +878,25 @@ def normalize_da(text, remove_articles=True):
     return normalized[1:]  # strip the initial space
 
 
-def extract_numbers_da(text, short_scale=True, ordinals=False):
+def extract_numbers_da(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_da, extract_number_da,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_de.py b/lingua_franca/lang/parse_de.py
index 95fda48e..81528e68 100644
--- a/lingua_franca/lang/parse_de.py
+++ b/lingua_franca/lang/parse_de.py
@@ -21,6 +21,7 @@
 from lingua_franca.lang.common_data_de import _DE_NUMBERS
 from lingua_franca.lang.format_de import pronounce_number_de
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 de_numbers = {
@@ -143,20 +144,28 @@ def repl(match):
     return (duration, text)
 
 
-def extract_number_de(text, short_scale=True, ordinals=False):
+def extract_number_de(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
-
-
-    undefined articles cannot be suppressed in German:
-    'ein Pferd' means 'one horse' and 'a horse'
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -1003,20 +1012,28 @@ def normalize_de(text, remove_articles=True):
     return normalized[1:]  # strip the initial space
 
 
-def extract_numbers_de(text, short_scale=True, ordinals=False):
-    """
-        Takes in a string and extracts a list of numbers.
-
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
-    Returns:
-        list: list of extracted numbers as floats
+def extract_numbers_de(text, short_scale=True, ordinals=False, decimal='.'):
     """
+       This function extracts a number from a text string,
+       handles pronunciations in long scale and short scale
+
+       https://en.wikipedia.org/wiki/Names_of_large_numbers
+
+       Args:
+           text (str): the string to normalize
+           short_scale (bool): use short scale if True, long scale if False
+           ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+           decimal (str): character to use as decimal point. defaults to '.'
+       Returns:
+           (int) or (float) or False: The extracted number or False if no number
+                                      was found
+       Note:
+           will always extract numbers formatted with a decimal dot/full stop,
+           such as '3.5', even if 'decimal' is specified.
+
+       """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_de, extract_number_de,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py
index a51ee02c..761c14aa 100644
--- a/lingua_franca/lang/parse_en.py
+++ b/lingua_franca/lang/parse_en.py
@@ -18,6 +18,7 @@
 from datetime import datetime, timedelta, time
 
 from dateutil.relativedelta import relativedelta
+from quebra_frases import span_indexed_word_tokenize
 
 from lingua_franca.internal import resolve_resource_file
 from lingua_franca.lang.common_data_en import _ARTICLES_EN, _LONG_ORDINAL_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, \
@@ -29,6 +30,7 @@
 from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \
     invert_dict, ReplaceableNumber, partition_list, tokenize, Token, Normalizer
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False):
@@ -368,9 +370,9 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
 
         # is the prev word a number and should we sum it?
         # twenty two, fifty six
-        if (prev_word in _SUMS_EN and val and val < 10) or all([prev_word in
-                                                                multiplies,
-                                                                val < prev_val if prev_val else False]):
+        if (prev_word in _SUMS_EN and val and val < 10) or \
+                all([prev_word in multiplies,
+                     val < prev_val if prev_val else False]):
             val = prev_val + val
 
         # is the prev word a number and should we multiply it?
@@ -529,7 +531,231 @@ def _initialize_number_data_en(short_scale, speech=True):
     return multiplies, string_num_ordinal_en, string_num_scale_en
 
 
-def extract_number_en(text, short_scale=True, ordinals=False):
+def extract_number_spans_en(utterance, short_scale=True, ordinals=False,
+                            fractional_numbers=True, decimal="."):
+    """
+        This function tags numbers in an utterance.
+
+        Args:
+            utterance (str): the string to normalize
+            short_scale (bool): use short scale if True, long scale if False
+            ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+            fractional_numbers (bool): True if we should look for fractions and
+                                       decimals.
+            decimal (str): decimal marker
+        Returns:
+            (list): list of tuples with detected number and span of the
+                    number in parent utterance [(number, (start_idx, end_idx))]
+
+        """
+    number_spans = []
+    if isinstance(utterance, str):
+        spans = span_indexed_word_tokenize(utterance)
+    else:
+        spans = utterance
+
+    # load language number data
+    multiplies, string_num_ordinal, string_num_scale = \
+        _initialize_number_data_en(short_scale, speech=ordinals is not None)
+
+    num_ended = False  # number string ended, save prev number
+    num = None
+    num2 = None
+
+    num_start = -1
+    num_end = -1
+    for idx, (start, end, word) in enumerate(spans):
+        if end <= num_end:
+            # token consumed already
+            continue
+
+        prev_span = spans[idx - 1] if idx > 0 else (-1, -1, "")
+        next_span = spans[idx + 1] if idx + 1 < len(spans) else (-1, -1, "")
+        next_next_span = spans[idx + 2] if idx + 2 < len(spans) else (-1, -1, "")
+
+        word = word.lower()
+        prev_word = prev_span[-1].lower()
+        next_word = next_span[-1].lower()
+
+        def found_number():
+            nonlocal num, number_spans, num_end, num_start
+            # found a number!
+            number_spans.append((num, (num_start, num_end)))
+            num = None
+
+        # is the word a number already ?
+        if not num and is_numeric(word):
+            num = int(word)
+            num_start = start
+            num_end = end
+
+        # spoken/text number?
+        elif not is_numeric(word):
+            # let's see if this continuation or end of a previous number
+            if num is not None:
+                # is this word the name of a number ?
+                if word in _STRING_NUM_EN:
+                    num2 = _STRING_NUM_EN.get(word)
+                elif word in string_num_scale:
+                    num2 = string_num_scale.get(word)
+                elif ordinals and word in string_num_ordinal:
+                    num2 = string_num_ordinal[word]
+
+                ## how do num and num2 relate
+                if num is not None and num_ended:
+                    # found a number!
+                    found_number()
+                    continue
+
+            # let's see if this word is the start of a number
+            else:
+                # explicit ordinals, 1st, 2nd, 3rd, 4th.... Nth
+                if is_numeric(word[:-2]) and \
+                        (word.endswith("st") or word.endswith("nd") or
+                         word.endswith("rd") or word.endswith("th")):
+                    num = int(word[:-2])
+                    num_start = start
+                    num_end = end
+
+                    # handle nth one
+                    if next_word == "one":
+                        # consume next span
+                        # would return 1 instead otherwise
+                        spans[idx + 1] = (-1, -1, "")
+
+                    # found a number!
+                    found_number()
+                    continue
+
+                # is this word the name of a number ?
+                if word in _STRING_NUM_EN:
+                    num = _STRING_NUM_EN.get(word)
+                elif word in string_num_scale:
+                    num = string_num_scale.get(word)
+                elif ordinals and word in string_num_ordinal:
+                    num = string_num_ordinal[word]
+
+
+                # is this a spoken fraction?
+                # half cup
+                #elif not (ordinals is None and word in string_num_ordinal):
+                #    num = is_fractional_en(word, short_scale=short_scale,
+                #                            spoken=ordinals is not None)
+
+                # process the number we found
+                if num is not None:
+                    # take note of span
+                    num_start = start
+                    num_end = end
+
+                    # negative number marker
+                    if prev_word in _NEGATIVES_EN:
+                        num = 0 - num
+                        num_start = prev_span[0]
+
+                    ## is this a final number?
+                    num_ended = False
+
+                    # explicit ordinals, 1st, 2nd, 3rd, 4th.... Nth
+                    if is_numeric(next_word[:-2]) and \
+                            (next_word.endswith("st") or next_word.endswith("nd") or
+                             next_word.endswith("rd") or next_word.endswith("th")):
+                        # new number coming up, invalid continuation
+                        num_ended = True
+
+                    if next_word in _NEGATIVES_EN:
+                        # a new negative sign is an invalid number continuation
+                        num_ended = True
+
+                    # end of sentence
+                    if idx == len(spans) - 1:
+                        num_ended = True
+
+                    if num is not None and num_ended:
+                        found_number()
+
+                    continue
+
+        # handle # and fraction, eg. "2 and 3/4"
+        if fractional_numbers and num is not None and \
+                next_span[-1] in _FRACTION_MARKER_EN and \
+                prev_span[-1] not in [decimal, "/"]:
+            fractional_piece = extract_number_spans_en(spans[end:],
+                                                       short_scale,
+                                                       ordinals,
+                                                       fractional_numbers,
+                                                       decimal)
+            if fractional_piece:
+                frac_num = fractional_piece[0][0]
+                # ensure first is not a fraction and second is a fraction
+                if num >= 1 and frac_num < 1:
+                    num += frac_num
+                    num_end = fractional_piece[0][1][1]
+                    number_spans.append((num, (num_start, num_end)))
+                    # return all parsed numbers after the marker
+                    # (do not reparse)
+                    return number_spans + fractional_piece[1:]
+
+        # handle # symbol #, eg. 1.5 or 3/4
+        elif word.isdigit() and \
+                next_next_span[-1].isdigit() and \
+                next_span[-1] in [decimal, "/"] and \
+                prev_span[-1] not in [decimal, "/"]:
+            num = int(word)
+            num_start = start
+            num_end = end
+            num2 = int(next_next_span[-1])
+
+            # negative number marker
+            if prev_word in _NEGATIVES_EN:
+                num = 0 - num
+                num_start = prev_span[0]
+
+            # handle #/#, eg. "1/5"
+            if next_span[-1] == "/":
+                num_start = start
+                num = num / num2
+                num_end = next_next_span[1]
+                # found a number!
+                found_number()
+                continue
+
+            # handle #.#, eg. "1.5"
+            elif next_span[-1] == decimal:
+                num2 = float(f"0.{num2}")
+                num = num + num2
+                num_end = next_next_span[1]
+                # found a number!
+                found_number()
+                continue
+
+        # handle #, eg. "123"
+        elif is_numeric(word):
+            if word.isdigit():  # doesn't work with decimals
+                num = int(word)
+            else:
+                num = float(word)
+            num_start = start
+            num_end = end
+            # negative number marker
+            if prev_word in _NEGATIVES_EN:
+                num = 0 - num
+                num_start = prev_span[0]
+            # found a number!
+            found_number()
+            continue
+
+    return number_spans
+
+
+def extract_number_en_v2(*args, **kwargs):
+    spans = extract_number_spans_en(*args, **kwargs)
+    if not spans:
+        return False
+    return extract_number_spans_en(*args, **kwargs)[0][0]
+
+
+def extract_number_en(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -540,11 +766,17 @@ def extract_number_en(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_en(tokenize(text.lower()),
                                         short_scale, ordinals).value
 
@@ -1655,7 +1887,7 @@ def is_fractional_en(input_str, short_scale=True, spoken=True):
     return False
 
 
-def extract_numbers_en(text, short_scale=True, ordinals=False):
+def extract_numbers_en(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -1666,9 +1898,15 @@ def extract_numbers_en(text, short_scale=True, ordinals=False):
             is now common in most English speaking countries.
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_en(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_es.py b/lingua_franca/lang/parse_es.py
index 0a810cc4..f2f471f1 100644
--- a/lingua_franca/lang/parse_es.py
+++ b/lingua_franca/lang/parse_es.py
@@ -20,6 +20,7 @@
 from lingua_franca.lang.format_es import pronounce_number_es
 from lingua_franca.lang.parse_common import *
 from lingua_franca.lang.common_data_es import _ARTICLES_ES, _STRING_NUM_ES
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def is_fractional_es(input_str, short_scale=True):
@@ -56,16 +57,28 @@ def is_fractional_es(input_str, short_scale=True):
     return False
 
 
-def extract_number_es(text, short_scale=True, ordinals=False):
+def extract_number_es(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -268,20 +281,25 @@ def es_number(i):
     return es_number(i)
 
 
-def extract_numbers_es(text, short_scale=True, ordinals=False):
+def extract_numbers_es(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_es,
                                    extract_number_es, short_scale=short_scale,
                                    ordinals=ordinals)
diff --git a/lingua_franca/lang/parse_fa.py b/lingua_franca/lang/parse_fa.py
index 753ac8eb..8d0be089 100644
--- a/lingua_franca/lang/parse_fa.py
+++ b/lingua_franca/lang/parse_fa.py
@@ -19,6 +19,7 @@
                                                _FARSI_ONES, _FARSI_TENS,
                                                _FORMAL_VARIANT)
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def _is_number(s):
@@ -307,20 +308,25 @@ def extract_datetime_fa(text, anchorDate=None, default_time=None):
     return (result, " ".join(remainder))
 
 
-def extract_numbers_fa(text, short_scale=True, ordinals=False):
+def extract_numbers_fa(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
 
     ar = _parse_sentence(text)
     result = []
@@ -330,7 +336,7 @@ def extract_numbers_fa(text, short_scale=True, ordinals=False):
     return result
 
 
-def extract_number_fa(text, ordinals=False):
+def extract_number_fa(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -341,11 +347,17 @@ def extract_number_fa(text, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     x = extract_numbers_fa(text, ordinals=ordinals)
     if (len(x) == 0):
         return False
diff --git a/lingua_franca/lang/parse_fr.py b/lingua_franca/lang/parse_fr.py
index 9728653f..87579c22 100644
--- a/lingua_franca/lang/parse_fr.py
+++ b/lingua_franca/lang/parse_fr.py
@@ -23,6 +23,7 @@
 from lingua_franca.lang.common_data_fr import _ARTICLES_FR, _NUMBERS_FR, \
     _ORDINAL_ENDINGS_FR
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def extract_duration_fr(text):
@@ -369,13 +370,28 @@ def _number_ordinal_fr(words, i):
     return None
 
 
-def extract_number_fr(text, short_scale=True, ordinals=False):
-    """Takes in a string and extracts a number.
+def extract_number_fr(text, short_scale=True, ordinals=False, decimal='.'):
+    """
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
-        text (str): the string to extract a number from
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (str): The number extracted or the original text.
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -1067,20 +1083,25 @@ def normalize_fr(text, remove_articles=True):
     return normalized[1:]  # strip the initial space
 
 
-def extract_numbers_fr(text, short_scale=True, ordinals=False):
+def extract_numbers_fr(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+       Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_fr, extract_number_fr,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_it.py b/lingua_franca/lang/parse_it.py
index 88c7455d..297445f3 100644
--- a/lingua_franca/lang/parse_it.py
+++ b/lingua_franca/lang/parse_it.py
@@ -28,6 +28,7 @@
     pronounce_number_it
 from lingua_franca.lang.common_data_it import _SHORT_ORDINAL_STRING_IT, \
     _ARTICLES_IT, _LONG_ORDINAL_STRING_IT, _STRING_NUM_IT
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def is_fractional_it(input_str, short_scale=False):
@@ -224,7 +225,7 @@ def _extract_number_long_it(word):
     return value
 
 
-def extract_number_it(text, short_scale=False, ordinals=False):
+def extract_number_it(text, short_scale=False, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -235,11 +236,17 @@ def extract_number_it(text, short_scale=False, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
 
     text = text.lower()
     string_num_ordinal_it = {}
@@ -1148,20 +1155,25 @@ def get_gender_it(word, context=""):
     return gender
 
 
-def extract_numbers_it(text, short_scale=False, ordinals=False):
+def extract_numbers_it(text, short_scale=False, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_it,
                                    extract_number_it,
                                    short_scale=short_scale, ordinals=ordinals)
diff --git a/lingua_franca/lang/parse_nl.py b/lingua_franca/lang/parse_nl.py
index ba197704..a1f074f7 100644
--- a/lingua_franca/lang/parse_nl.py
+++ b/lingua_franca/lang/parse_nl.py
@@ -26,6 +26,7 @@
     _STRING_SHORT_ORDINAL_NL, _SUMS_NL
 from lingua_franca.time import now_local
 import re
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def _convert_words_to_numbers_nl(text, short_scale=True, ordinals=False):
@@ -414,10 +415,10 @@ def _initialize_number_data_nl(short_scale):
     return multiplies, string_num_ordinal_nl, string_num_scale_nl
 
 
-def extract_number_nl(text, short_scale=True, ordinals=False):
-    """Extract a number from a text string
-
-    The function handles pronunciations in long scale and short scale
+def extract_number_nl(text, short_scale=True, ordinals=False, decimal='.'):
+    """
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
 
     https://en.wikipedia.org/wiki/Names_of_large_numbers
 
@@ -425,10 +426,17 @@ def extract_number_nl(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_nl(tokenize(text.lower()),
                                         short_scale, ordinals).value
 
@@ -1294,19 +1302,24 @@ def is_fractional_nl(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_nl(text, short_scale=True, ordinals=False):
+def extract_numbers_nl(text, short_scale=True, ordinals=False, decimal='.'):
     """Takes in a string and extracts a list of numbers.
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_nl(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_pl.py b/lingua_franca/lang/parse_pl.py
index 84f83bc8..67518d25 100644
--- a/lingua_franca/lang/parse_pl.py
+++ b/lingua_franca/lang/parse_pl.py
@@ -24,6 +24,8 @@
     _TIME_UNITS_NORMALIZATION, _MONTHS_TO_EN, _DAYS_TO_EN, _ORDINAL_BASE_PL, \
     _ALT_ORDINALS_PL
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
+
 import re
 
 
@@ -576,7 +578,7 @@ def _initialize_number_data(short_scale):
     return multiplies, _STRING_SHORT_ORDINAL_PL, string_num_scale
 
 
-def extract_number_pl(text, short_scale=True, ordinals=False):
+def extract_number_pl(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -587,11 +589,17 @@ def extract_number_pl(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_pl(tokenize(text.lower()),
                                         True, ordinals).value
 
@@ -1333,20 +1341,28 @@ def isFractional_pl(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_pl(text, short_scale=True, ordinals=False):
+def extract_numbers_pl(text, short_scale=True, ordinals=False, decimal='.'):
     """
-        Takes in a string and extracts a list of numbers.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        list: list of extracted numbers as floats
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_pl(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_pt.py b/lingua_franca/lang/parse_pt.py
index 356c1e83..ab1dd94c 100644
--- a/lingua_franca/lang/parse_pt.py
+++ b/lingua_franca/lang/parse_pt.py
@@ -29,6 +29,8 @@
 from lingua_franca.internal import resolve_resource_file
 from lingua_franca.lang.parse_common import Normalizer
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
+
 import json
 import re
 import unicodedata
@@ -77,16 +79,28 @@ def is_fractional_pt(input_str, short_scale=True):
     return False
 
 
-def extract_number_pt(text, short_scale=True, ordinals=False):
+def extract_number_pt(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
diff --git a/lingua_franca/lang/parse_sv.py b/lingua_franca/lang/parse_sv.py
index 02164111..bb23f2ee 100644
--- a/lingua_franca/lang/parse_sv.py
+++ b/lingua_franca/lang/parse_sv.py
@@ -17,6 +17,7 @@
 from dateutil.relativedelta import relativedelta
 
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 from .parse_common import (is_numeric, look_for_fractions, Normalizer,
                            tokenize, Token)
@@ -156,15 +157,23 @@ def extract_duration_sv(text):
     return (td, remainder) if valid else None
 
 
-def extract_number_sv(text, short_scale=True, ordinals=False):
+def extract_number_sv(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function prepares the given text for parsing by making
     numbers consistent, getting rid of contractions, etc.
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float): The value of extracted number
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API
     # compatibility reasons.
diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index f1602717..76dbad4e 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -13,9 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import re
 import json
 from lingua_franca.util import match_one, fuzzy_match, MatchStrategy
-from lingua_franca.lang.parse_common import match_yes_or_no
+from lingua_franca.lang.parse_common import match_yes_or_no, is_numeric
 from difflib import SequenceMatcher
 from warnings import warn
 from lingua_franca.time import now_local
@@ -24,13 +25,17 @@
     get_default_lang, localized_function, _raise_unsupported_language, UnsupportedLanguageError,\
     resolve_resource_file, FunctionNotLocalizedError
 import unicodedata
+from quebra_frases import span_indexed_word_tokenize
 
 
+# TODO deprecate extract_number and extract_numbers in favor of
+#  extract_number_spans to rule them all
 _REGISTERED_FUNCTIONS = ("extract_numbers",
                          "extract_number",
                          "extract_duration",
                          "extract_datetime",
                          "extract_langcode",
+                         "extract_number_spans",
                          "normalize",
                          "get_gender",
                          "yes_or_no",
@@ -40,6 +45,44 @@
 populate_localized_function_dict("parse", langs=get_active_langs())
 
 
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def extract_number_spans(utterance, short_scale=True, ordinals=False,
+                         fractional_numbers=True, decimal=".", lang=''):
+    """
+        This function tags numbers in an utterance.
+
+        Args:
+            utterance (str): the string to normalize
+            short_scale (bool): use short scale if True, long scale if False
+            ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+            fractional_numbers (bool): True if we should look for fractions and
+                                       decimals.
+            decimal (str): decimal marker
+            lang (str, optional): an optional BCP-47 language code, if omitted
+                              the default language will be used.
+        Returns:
+            (list): list of tuples with detected number and span of the
+                    number in parent utterance [(number, (start_idx, end_idx))]
+
+        """
+    number_spans = []
+    spans = span_indexed_word_tokenize(utterance)
+    for idx, (start, end, word) in enumerate(spans):
+        next_span = spans[idx + 1] if idx + 1 < len(spans) else ()
+        next_next_span = spans[idx + 2] if idx + 2 < len(spans) else ()
+        if is_numeric(word):
+            if next_span and next_next_span and \
+                    next_span[-1] == decimal and \
+                    is_numeric(next_next_span[-1]):
+                end = next_next_span[1]
+                num = float("".join([word, next_span[-1], next_next_span[-1]]))
+                spans[idx + 1] = spans[idx + 2] = (-1, -1, "")
+            else:
+                num = int(word)
+            number_spans.append((num, (start, end)))
+    return number_spans
+
+
 @localized_function(run_own_code_on=[FunctionNotLocalizedError])
 def yes_or_no(text, lang=""):
     text = normalize(text, lang=lang, remove_articles=True).lower()
@@ -55,8 +98,9 @@ def extract_langcode(text, lang=""):
     return match_one(text, LANGUAGES, strategy=MatchStrategy.TOKEN_SET_RATIO)
 
 
-@localized_function()
-def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def extract_numbers(text, short_scale=True, ordinals=False, lang='',
+                    decimal='.', fractional_numbers=True):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -69,28 +113,83 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str, optional): an optional BCP-47 language code, if omitted
                               the default language will be used.
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
+    spans = extract_number_spans(text, short_scale=short_scale, lang=lang,
+                                 ordinals=ordinals, decimal=decimal,
+                                 fractional_numbers=fractional_numbers)
+    if spans:
+        return [a[0] for a in spans]
+    return []
 
 
-@localized_function()
-def extract_number(text, short_scale=True, ordinals=False, lang=''):
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def extract_number(text, short_scale=True, ordinals=False, lang='',
+                   decimal='.', fractional_numbers=True):
+    """backwards compat, use extract_first_number instead"""
+    return extract_first_number(text, short_scale, ordinals,
+                                lang, decimal, fractional_numbers)
+
+
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def extract_first_number(text, short_scale=True, ordinals=False, lang='',
+                         decimal='.', fractional_numbers=True):
     """Takes in a string and extracts a number.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
-        lang (str, optional): an optional BCP-47 language code, if omitted
-                              the default language will be used.
-    Returns:
-        (int, float or False): The number extracted or False if the input
-                               text contains no numbers
-    """
+        Args:
+            text (str): the string to extract a number from
+            short_scale (bool): Use "short scale" or "long scale" for large
+                numbers -- over a million.  The default is short scale, which
+                is now common in most English speaking countries.
+                See https://en.wikipedia.org/wiki/Names_of_large_numbers
+            ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+            lang (str, optional): an optional BCP-47 language code, if omitted
+                                  the default language will be used.
+            decimal (str): character to use as decimal point. defaults to '.'
+        Returns:
+            (int, float or False): The number extracted or False if the input
+                                   text contains no numbers
+        Note:
+            will always extract numbers formatted with a decimal dot/full stop,
+            such as '3.5', even if 'decimal' is specified.
+        """
+    numbers = extract_numbers(text, short_scale, ordinals, lang, decimal, fractional_numbers)
+    if numbers:
+        return numbers[0]
+    return False
+
+
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def extract_last_number(text, short_scale=True, ordinals=False, lang='',
+                        decimal='.', fractional_numbers=True):
+    """Takes in a string and extracts a number.
+
+        Args:
+            text (str): the string to extract a number from
+            short_scale (bool): Use "short scale" or "long scale" for large
+                numbers -- over a million.  The default is short scale, which
+                is now common in most English speaking countries.
+                See https://en.wikipedia.org/wiki/Names_of_large_numbers
+            ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+            lang (str, optional): an optional BCP-47 language code, if omitted
+                                  the default language will be used.
+            decimal (str): character to use as decimal point. defaults to '.'
+        Returns:
+            (int, float or False): The number extracted or False if the input
+                                   text contains no numbers
+        Note:
+            will always extract numbers formatted with a decimal dot/full stop,
+            such as '3.5', even if 'decimal' is specified.
+        """
+    numbers = extract_numbers(text, short_scale, ordinals, lang, decimal, fractional_numbers)
+    if numbers:
+        return numbers[-1]
+    return False
 
 
 @localized_function()
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 5d943a5d..2df4433c 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,2 +1,3 @@
 python-dateutil~=2.6
-rapidfuzz
\ No newline at end of file
+rapidfuzz
+quebra_frases>=0.3.4
\ No newline at end of file
diff --git a/test/unittests/test_format_pt.py b/test/unittests/test_format_pt.py
index 61c94406..931afeca 100644
--- a/test/unittests/test_format_pt.py
+++ b/test/unittests/test_format_pt.py
@@ -21,6 +21,7 @@
 from lingua_franca.format import nice_time
 from lingua_franca.format import pronounce_number
 from lingua_franca.time import default_timezone
+from lingua_franca.parse import extract_number_spans
 
 
 def setUpModule():
@@ -64,6 +65,16 @@ def tearDownModule():
 }
 
 
+class TestSpans(unittest.TestCase):
+    def test_number_spans(self):
+        self.assertEqual(extract_number_spans("este é o teste 1 2 3 666 1.5"),
+                         [(1, (15, 16)),
+                          (2, (17, 18)),
+                          (3, (19, 20)),
+                          (666, (21, 24)),
+                          (1.5, (25, 28))])
+
+
 class TestPronounceNumber(unittest.TestCase):
     def test_convert_int(self):
         self.assertEqual(pronounce_number(0, lang="pt"), "zero")
diff --git a/test/unittests/test_parse_en.py b/test/unittests/test_parse_en.py
index caae8999..4bd066bf 100644
--- a/test/unittests/test_parse_en.py
+++ b/test/unittests/test_parse_en.py
@@ -20,14 +20,10 @@
 
 from lingua_franca import load_language, unload_language, set_default_lang
 from lingua_franca.internal import FunctionNotLocalizedError
-from lingua_franca.parse import extract_datetime
-from lingua_franca.parse import extract_duration
-from lingua_franca.parse import extract_number, extract_numbers
-from lingua_franca.parse import get_gender
-from lingua_franca.parse import normalize
 from lingua_franca.time import default_timezone, to_local
-from lingua_franca.parse import extract_langcode
-from lingua_franca.parse import yes_or_no
+from lingua_franca.parse import extract_datetime, extract_duration, extract_number, \
+    extract_numbers, get_gender, normalize, extract_langcode, yes_or_no, extract_number_spans
+from lingua_franca.lang.parse_en import extract_number_en_v2
 
 
 def setUpModule():
@@ -290,6 +286,17 @@ def test_combinations(self):
 
 
 class TestExtractNumber(unittest.TestCase):
+    def test_extract_number_decimal_markers(self):
+        # Test decimal normalization
+        self.assertEqual(extract_number("4,4", decimal=','), 4.4)
+        self.assertEqual(extract_number("we have 3,5 kilometers to go",
+                                        decimal=','), 3.5)
+        self.assertEqual(extract_numbers("this is a seven eight 9,5 test",
+                                         decimal=','),
+                         [7.0, 8.0, 9.5])
+        self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test",
+                                         decimal=','), [7.0, 8.0, 9.6])
+
     def test_extract_number_priority(self):
         # sanity check
         self.assertEqual(extract_number("third", ordinals=True), 3)
@@ -1742,5 +1749,328 @@ def test_with_conf(text, expected_lang, min_conf=0.8):
         test_with_conf("American", 'en-us')
 
 
+class TestNumberSpans(unittest.TestCase):
+    def test_number_spans(self):
+        self.assertEqual(extract_number_spans("this is test 1 2 3 666 1.5"),
+                         [(1, (13, 14)),
+                          (2, (15, 16)),
+                          (3, (17, 18)),
+                          (666, (19, 22)),
+                          (1.5, (23, 26))])
+        self.assertEqual(extract_number_spans("this is test 1.5.5"),
+                         [(1.5, (13, 16)),
+                          (5, (17, 18))])
+
+    def test_number_spans_frac(self):
+        self.assertEqual(extract_number_spans("2 and 3/4"),
+                         [(2.75, (0, 9))])
+        self.assertEqual(extract_number_spans("2 and 3/4 and after that "
+                                              "comes 1.5"),
+                         [(2.75, (0, 9)),
+                          (1.5, (31, 34))])
+        self.assertEqual(extract_number_spans("2 and 3/4 and after that "
+                                              "comes 0.5"),
+                         [(2.75, (0, 9)),
+                          (0.5, (31, 34))])
+        self.assertEqual(extract_number_spans("2 and 3/4 and 27"),
+                         [(2.75, (0, 9)),
+                          (27, (14, 16))])
+
+    def test_number_spoken_frac(self):
+        self.assertEqual(extract_number_spans("half cup"),
+                         [(0.5, (0, 4))])
+        self.assertEqual(extract_number_spans("third cup"),
+                         [(1 / 3, (0, 5))])
+
+    def test_number_ordinals(self):
+        self.assertEqual(extract_number_spans("this is the 1st the 2nd the "
+                                              "3rd the 4th and the Nth"),
+                         [(1, (12, 15)),
+                          (2, (20, 23)),
+                          (3, (28, 31)),
+                          (4, (36, 39))])
+
+    def test_number_spoken_ordinals(self):
+        self.assertEqual(extract_number_spans("fourth cup", ordinals=True),
+                         [(4, (0, 6))])
+        self.assertEqual(extract_number_spans("third cup", ordinals=True),
+                         [(3, (0, 5))])
+
+    def test_integers(self):
+        self.assertEqual(extract_number_spans("number one"),
+                         [(1, (7, 10))])
+        self.assertEqual(extract_number_spans("number one number two number "
+                                              "four"),
+                         [(1, (7, 10)),
+                          (2, (20, 23)),
+                          (3, (28, 31)),
+                          (4, (36, 39))])
+
+    def test_scale(self):
+        self.assertEqual(extract_number_spans("a trillion numbers"),
+                         [(1e+12, (2, 10))])
+        self.assertEqual(extract_number_spans("a trillion numbers",
+                                              short_scale=False),
+                         [(1e+18, (2, 10))])
+
+
+class TestExtractNumberV2(unittest.TestCase):
+    def test_extract_percent(self):
+        self.assertEqual(extract_number_en_v2("totally 100%"), 100)
+
+    def test_extract_number_decimal_markers(self):
+        # Test decimal normalization
+        self.assertEqual(extract_number_en_v2("4,4", decimal=','), 4.4)
+        self.assertEqual(extract_number_en_v2("we have 3,5 kilometers to go",
+                                              decimal=','), 3.5)
+
+    def test_extract_number_priority(self):
+        # sanity check
+        self.assertEqual(extract_number_en_v2("third", ordinals=True), 3)
+        self.assertEqual(extract_number_en_v2("sixth", ordinals=True), 6)
+        self.assertEqual(extract_number_en_v2("sixth third", ordinals=True), 6)
+        self.assertEqual(extract_number_en_v2("third sixth", ordinals=True), 3)
+
+        # TODO FIXME
+        self.assertEqual(extract_number_en_v2("Twenty two and Three Fifths",
+                                              ordinals=True), 22)
+
+    def test_extract_number_explicit_ordinals(self):
+        # test explicit ordinals
+        self.assertEqual(extract_number_en_v2("this is the 1st",
+                                              ordinals=True), 1)
+        self.assertEqual(extract_number_en_v2("this is the 2nd",
+                                              ordinals=False), 2)
+        self.assertEqual(extract_number_en_v2("this is the 3rd",
+                                              ordinals=None), 3)
+        self.assertEqual(extract_number_en_v2("this is the 4th",
+                                              ordinals=None), 4)
+        self.assertEqual(extract_number_en_v2("this is the 7th test",
+                                              ordinals=True), 7)
+        self.assertEqual(extract_number_en_v2("this is the 7th test",
+                                              ordinals=False), 7)
+        self.assertEqual(extract_number_en_v2("this is the 1st test"), 1)
+        self.assertEqual(extract_number_en_v2("this is the 2nd test"), 2)
+        self.assertEqual(extract_number_en_v2("this is the 3rd test"), 3)
+        self.assertEqual(extract_number_en_v2("this is the 31st test"), 31)
+        self.assertEqual(extract_number_en_v2("this is the 32nd test"), 32)
+        self.assertEqual(extract_number_en_v2("this is the 33rd test"), 33)
+        self.assertEqual(extract_number_en_v2("this is the 34th test"), 34)
+
+        self.assertTrue(extract_number_en_v2("this is the nth test") is False)
+
+    def test_extract_number_spoken_ordinals(self):
+        # test non ambiguous ordinals
+        self.assertEqual(extract_number_en_v2("this is the first test",
+                                              ordinals=True), 1)
+        self.assertEqual(extract_number_en_v2("this is the first test",
+                                              ordinals=False), False)
+        self.assertEqual(extract_number_en_v2("this is the first test",
+                                              ordinals=None), False)
+
+        # test ambiguous ordinal/time unit
+        self.assertEqual(extract_number_en_v2("this is second test",
+                                              ordinals=True), 2)
+        self.assertEqual(extract_number_en_v2("this is second test",
+                                              ordinals=False), False)
+        self.assertEqual(extract_number_en_v2("remind me in a second",
+                                              ordinals=True), 2)
+        self.assertEqual(extract_number_en_v2("remind me in a second",
+                                              ordinals=False), False)
+        self.assertEqual(extract_number_en_v2("remind me in a second",
+                                              ordinals=None), False)
+
+        # test ambiguous ordinal/fractional
+        self.assertEqual(extract_number_en_v2("this is the third test",
+                                              ordinals=True), 3.0)
+        self.assertEqual(extract_number_en_v2("this is the third test",
+                                              ordinals=False), 1.0 / 3.0)
+        self.assertEqual(extract_number_en_v2("this is the third test",
+                                              ordinals=None), False)
+
+        # TODO FIXME
+        self.assertEqual(extract_number_en_v2("one third of a cup",
+                                              ordinals=False), 1.0 / 3.0)
+        self.assertEqual(extract_number_en_v2("one third of a cup",
+                                              ordinals=True), 3)
+        self.assertEqual(extract_number_en_v2("one third of a cup",
+                                              ordinals=None), 1)
+
+    def test_extract_number_nth_one(self):
+        # test the Nth one
+        self.assertEqual(extract_number_en_v2("the fourth one",
+                                              ordinals=True), 4.0)
+        self.assertEqual(extract_number_en_v2("you are the second one",
+                                              ordinals=False), 1)
+        self.assertEqual(extract_number_en_v2("you are the second one",
+                                              ordinals=True), 2)
+        self.assertEqual(extract_number_en_v2("you are the 1st one",
+                                              ordinals=None), 1)
+        self.assertEqual(extract_number_en_v2("you are the 2nd one",
+                                              ordinals=None), 2)
+        self.assertEqual(extract_number_en_v2("you are the 3rd one",
+                                              ordinals=None), 3)
+        self.assertEqual(extract_number_en_v2("you are the 8th one",
+                                              ordinals=None), 8)
+
+        # TODO FIXME
+        self.assertEqual(extract_number_en_v2("the thirty sixth one",
+                                              ordinals=True), 36.0)
+
+    def test_scale(self):
+        # test big numbers / short vs long scale
+        self.assertEqual(extract_number_en_v2("this is the billionth test",
+                                              ordinals=True), 1e09)
+        self.assertEqual(extract_number_en_v2("this is the billionth test",
+                                              ordinals=None), False)
+
+        self.assertEqual(extract_number_en_v2("this is the billionth test",
+                                              ordinals=False), 1e-9)
+        self.assertEqual(extract_number_en_v2("this is the billionth test",
+                                              ordinals=True,
+                                              short_scale=False), 1e12)
+        self.assertEqual(extract_number_en_v2("this is the billionth test",
+                                              ordinals=None,
+                                              short_scale=False), False)
+        self.assertEqual(extract_number_en_v2("this is the billionth test",
+                                              short_scale=False), 1e-12)
+
+    def test_extract_number_ambiguous_fraction_ordinal(self):
+        # confirm these are not cumulative, prev version would multiple them
+        self.assertEqual(extract_number_en_v2("sixth third", ordinals=False),
+                         1 / 6)
+
+        # test plurals
+        # NOTE plurals are never considered ordinals, but also not
+        # considered explicit fractions
+        self.assertEqual(extract_number_en_v2("2 fifths",
+                                              ordinals=True), 2)
+        self.assertEqual(extract_number_en_v2("2 fifth",
+                                              ordinals=True), 5)
+        self.assertEqual(extract_number_en_v2("2 fifths",
+                                              ordinals=False), 2 / 5)
+        self.assertEqual(extract_number_en_v2("2 fifths",
+                                              ordinals=None), 2)
+
+        self.assertEqual(extract_number_en_v2("Twenty two and Three Fifths"),
+                         22.6)
+
+        # test multiple ambiguous
+        self.assertEqual(extract_number_en_v2("sixth third", ordinals=None),
+                         False)
+        self.assertEqual(extract_number_en_v2("thirty second", ordinals=False),
+                         30)
+        self.assertEqual(extract_number_en_v2("thirty second", ordinals=None),
+                         30)
+        self.assertEqual(extract_number_en_v2("thirty second", ordinals=True),
+                         32)
+
+        self.assertEqual(extract_number_en_v2("sixth third", ordinals=False),
+                         6)
+
+    def test_extract_number_negative(self):
+        self.assertEqual(extract_number_en_v2("minus two"), -2)
+        self.assertEqual(extract_number_en_v2("minus 2"), -2)
+        self.assertEqual(extract_number_en_v2("negative two"), -2)
+        self.assertEqual(extract_number_en_v2("minus 1/3"), - 1 / 3)
+        self.assertEqual(extract_number_en_v2("-2"), -2)
+        self.assertEqual(extract_number_en_v2("- 2"), -2)
+        self.assertEqual(extract_number_en_v2("-1/3"), - 1/3)
+        self.assertEqual(extract_number_en_v2("- 1/3"), - 1 / 3)
+
+    def test_extract_number_fracs(self):
+        self.assertEqual(extract_number_en_v2("1/3 cups"), 1.0 / 3.0)
+        self.assertEqual(extract_number_en_v2("quarter cup"), 0.25)
+        self.assertEqual(extract_number_en_v2("1/4 cup"), 0.25)
+        self.assertEqual(extract_number_en_v2("2/3 cups"), 2.0 / 3.0)
+        self.assertEqual(extract_number_en_v2("3/4 cups"), 3.0 / 4.0)
+        self.assertEqual(extract_number_en_v2("1 and 3/4 cups"), 1.75)
+
+        # TODO FIXME
+        self.assertEqual(extract_number_en_v2("three quarter cups"), 3.0 / 4.0)
+        self.assertEqual(extract_number_en_v2("three quarters cups"),
+                         3.0 / 4.0)
+        self.assertEqual(extract_number_en_v2("one and one half cups"), 1.5)
+        self.assertEqual(extract_number_en_v2("one and a half cups"), 1.5)
+        self.assertEqual(extract_number_en_v2("one cup and a half"), 1.5)
+        self.assertEqual(extract_number_en_v2("1 cup and a half"), 1.5)
+        self.assertEqual(extract_number_en_v2("one fourth cup"), 0.25)
+
+    def test_extract_number(self):
+        self.assertEqual(extract_number_en_v2("this is 2 test"), 2)
+        self.assertEqual(extract_number_en_v2("this is test number 4"), 4)
+        self.assertEqual(extract_number_en_v2("three cups"), 3)
+        self.assertEqual(extract_number_en_v2("twenty two"), 22)
+        self.assertEqual(extract_number_en_v2(
+            "Twenty two with a leading capital letter"), 22)
+        self.assertEqual(extract_number_en_v2(
+            "twenty Two with Two capital letters"), 22)
+        self.assertEqual(extract_number_en_v2(
+            "twenty Two with mixed capital letters"), 22)
+        self.assertEqual(extract_number_en_v2("two hundred"), 200)
+        self.assertEqual(extract_number_en_v2("nine thousand"), 9000)
+        self.assertEqual(extract_number_en_v2("six hundred sixty six"), 666)
+        self.assertEqual(extract_number_en_v2("two million"), 2000000)
+        self.assertEqual(extract_number_en_v2(
+            "two million five hundred thousand tons of spinning metal"),
+            2500000)
+        self.assertEqual(extract_number_en_v2("six trillion"), 6000000000000.0)
+        self.assertEqual(extract_number_en_v2("six trillion",
+                                              short_scale=False), 6e+18)
+        self.assertEqual(extract_number_en_v2("one point five"), 1.5)
+        self.assertEqual(extract_number_en_v2("three dot fourteen"), 3.14)
+        self.assertEqual(extract_number_en_v2("zero point two"), 0.2)
+        self.assertEqual(extract_number_en_v2("billions of years older"),
+                         1000000000.0)
+        self.assertEqual(extract_number_en_v2(
+            "billions of years older", short_scale=False), 1000000000000.0)
+        self.assertEqual(extract_number_en_v2("one hundred thousand"), 100000)
+
+        self.assertEqual(extract_number_en_v2("negative seventy"), -70)
+        self.assertEqual(extract_number_en_v2("thousand million"), 1000000000)
+
+        # Verify non-power multiples of ten no longer discard
+        # adjacent multipliers
+        self.assertEqual(extract_number_en_v2("twenty thousand"), 20000)
+        self.assertEqual(extract_number_en_v2("fifty million"), 50000000)
+
+        # Verify smaller powers of ten no longer cause miscalculation of larger
+        # powers of ten (see MycroftAI#86)
+        self.assertEqual(extract_number_en_v2("twenty billion three hundred million \
+                                        nine hundred fifty thousand six hundred \
+                                        seventy five point eight"),
+                         20300950675.8)
+        self.assertEqual(extract_number_en_v2("nine hundred ninety nine million nine \
+                                        hundred ninety nine thousand nine \
+                                        hundred ninety nine point nine"),
+                         999999999.9)
+
+        # TODO why does "trillion" result in xxxx.0?
+        self.assertEqual(extract_number_en_v2("eight hundred trillion two hundred \
+                                        fifty seven"), 800000000000257.0)
+
+        # TODO handle this case
+        # self.assertEqual(
+        #    extract_number_en_v2("6 dot six six six"),
+        #    6.666)
+
+    def test_extract_no_number(self):
+        self.assertTrue(
+            extract_number_en_v2("The tennis player is fast") is False)
+        self.assertTrue(extract_number_en_v2("fraggle") is False)
+
+        self.assertTrue(extract_number_en_v2("grobo 0") is not False)
+        self.assertEqual(extract_number_en_v2("grobo 0"), 0)
+
+        self.assertTrue(extract_number_en_v2("fraggle zero") is not False)
+        self.assertEqual(extract_number_en_v2("fraggle zero"), 0)
+
+    def test_extract_couple_number(self):
+        # TODO FIXME
+        self.assertEqual(extract_number_en_v2("a couple of beers"), 2)
+        self.assertEqual(extract_number_en_v2("a couple hundred beers"), 200)
+        self.assertEqual(extract_number_en_v2("a couple thousand beers"), 2000)
+
+
 if __name__ == "__main__":
     unittest.main()

From 4eebadaf44b239569267db3c69428065c3e284c3 Mon Sep 17 00:00:00 2001
From: jarbasai <jarbasai@mailfence.com>
Date: Sun, 27 Nov 2022 14:57:30 +0000
Subject: [PATCH 2/2] cleanup

---
 lingua_franca/lang/parse_en.py  | 23 ++++++++++++-----------
 lingua_franca/parse.py          | 14 ++++++++++----
 test/unittests/test_parse_en.py |  2 +-
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py
index 761c14aa..b4203bbf 100644
--- a/lingua_franca/lang/parse_en.py
+++ b/lingua_franca/lang/parse_en.py
@@ -531,28 +531,28 @@ def _initialize_number_data_en(short_scale, speech=True):
     return multiplies, string_num_ordinal_en, string_num_scale_en
 
 
-def extract_number_spans_en(utterance, short_scale=True, ordinals=False,
-                            fractional_numbers=True, decimal="."):
+def extract_number_spans_en(text, short_scale=True, ordinals=False,
+                            decimal=".", fractional_numbers=True):
     """
         This function tags numbers in an utterance.
 
         Args:
-            utterance (str): the string to normalize
+            text (str): the string to normalize
             short_scale (bool): use short scale if True, long scale if False
             ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+            decimal (str): decimal marker
             fractional_numbers (bool): True if we should look for fractions and
                                        decimals.
-            decimal (str): decimal marker
         Returns:
             (list): list of tuples with detected number and span of the
                     number in parent utterance [(number, (start_idx, end_idx))]
 
         """
     number_spans = []
-    if isinstance(utterance, str):
-        spans = span_indexed_word_tokenize(utterance)
+    if isinstance(text, str):
+        spans = span_indexed_word_tokenize(text)
     else:
-        spans = utterance
+        spans = text
 
     # load language number data
     multiplies, string_num_ordinal, string_num_scale = \
@@ -748,14 +748,15 @@ def found_number():
     return number_spans
 
 
-def extract_number_en_v2(*args, **kwargs):
-    spans = extract_number_spans_en(*args, **kwargs)
+def extract_number_en_v2(text, short_scale=True, ordinals=False, decimal='.', fractional_numbers=True):
+    spans = extract_number_spans_en(text, short_scale=short_scale, ordinals=ordinals,
+                                    decimal=decimal, fractional_numbers=fractional_numbers)
     if not spans:
         return False
-    return extract_number_spans_en(*args, **kwargs)[0][0]
+    return spans[0][0]
 
 
-def extract_number_en(text, short_scale=True, ordinals=False, decimal='.'):
+def extract_number_en(text, short_scale=True, ordinals=False, decimal='.', fractional_numbers=True):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index 76dbad4e..ee1cadec 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -46,13 +46,13 @@
 
 
 @localized_function(run_own_code_on=[FunctionNotLocalizedError])
-def extract_number_spans(utterance, short_scale=True, ordinals=False,
-                         fractional_numbers=True, decimal=".", lang=''):
+def extract_number_spans(text, short_scale=True, ordinals=False,
+                        decimal=".", fractional_numbers=True, lang=''):
     """
         This function tags numbers in an utterance.
 
         Args:
-            utterance (str): the string to normalize
+            text (str): the string to normalize
             short_scale (bool): use short scale if True, long scale if False
             ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
             fractional_numbers (bool): True if we should look for fractions and
@@ -66,7 +66,7 @@ def extract_number_spans(utterance, short_scale=True, ordinals=False,
 
         """
     number_spans = []
-    spans = span_indexed_word_tokenize(utterance)
+    spans = span_indexed_word_tokenize(text)
     for idx, (start, end, word) in enumerate(spans):
         next_span = spans[idx + 1] if idx + 1 < len(spans) else ()
         next_next_span = spans[idx + 2] if idx + 2 < len(spans) else ()
@@ -114,6 +114,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang='',
         lang (str, optional): an optional BCP-47 language code, if omitted
                               the default language will be used.
         decimal (str): character to use as decimal point. defaults to '.'
+        fractional_numbers (bool): True if we should look for fractions and
+                                   decimals.
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
     Note:
@@ -151,6 +153,8 @@ def extract_first_number(text, short_scale=True, ordinals=False, lang='',
             lang (str, optional): an optional BCP-47 language code, if omitted
                                   the default language will be used.
             decimal (str): character to use as decimal point. defaults to '.'
+            fractional_numbers (bool): True if we should look for fractions and
+                                       decimals.
         Returns:
             (int, float or False): The number extracted or False if the input
                                    text contains no numbers
@@ -179,6 +183,8 @@ def extract_last_number(text, short_scale=True, ordinals=False, lang='',
             lang (str, optional): an optional BCP-47 language code, if omitted
                                   the default language will be used.
             decimal (str): character to use as decimal point. defaults to '.'
+            fractional_numbers (bool): True if we should look for fractions and
+                                       decimals.
         Returns:
             (int, float or False): The number extracted or False if the input
                                    text contains no numbers
diff --git a/test/unittests/test_parse_en.py b/test/unittests/test_parse_en.py
index 4bd066bf..2c1aedef 100644
--- a/test/unittests/test_parse_en.py
+++ b/test/unittests/test_parse_en.py
@@ -1936,7 +1936,7 @@ def test_scale(self):
                                               short_scale=False), 1e-12)
 
     def test_extract_number_ambiguous_fraction_ordinal(self):
-        # confirm these are not cumulative, prev version would multiple them
+        # confirm these are not cumulative, prev version would multiply them
         self.assertEqual(extract_number_en_v2("sixth third", ordinals=False),
                          1 / 6)