From 66265876997cc16864e5542d319930e5ce81a902 Mon Sep 17 00:00:00 2001
From: jarbasai <jarbasai@mailfence.com>
Date: Sun, 9 May 2021 13:20:35 +0100
Subject: [PATCH] support decimal markers

rebase of https://github.com/MycroftAI/lingua-franca/pull/69
---
 lingua_franca/internal.py          |  8 +++++
 lingua_franca/lang/parse_common.py | 12 +++++++
 lingua_franca/lang/parse_cs.py     | 28 ++++++++++-----
 lingua_franca/lang/parse_da.py     | 45 ++++++++++++++---------
 lingua_franca/lang/parse_de.py     | 58 +++++++++++++++++++-----------
 lingua_franca/lang/parse_en.py     | 17 +++++++--
 lingua_franca/lang/parse_es.py     | 41 ++++++++++++++-------
 lingua_franca/lang/parse_eu.py     | 24 ++++++++-----
 lingua_franca/lang/parse_fa.py     | 29 ++++++++++-----
 lingua_franca/lang/parse_fr.py     | 44 ++++++++++++++++-------
 lingua_franca/lang/parse_it.py     | 29 ++++++++++-----
 lingua_franca/lang/parse_nl.py     | 34 ++++++++++++------
 lingua_franca/lang/parse_pl.py     | 32 ++++++++++++-----
 lingua_franca/lang/parse_pt.py     | 21 ++++++++---
 lingua_franca/lang/parse_ru.py     | 24 ++++++++-----
 lingua_franca/lang/parse_sv.py     | 11 +++++-
 lingua_franca/parse.py             | 15 ++++++--
 test/unittests/test_parse_en.py    | 11 ++++++
 18 files changed, 350 insertions(+), 133 deletions(-)

diff --git a/lingua_franca/internal.py b/lingua_franca/internal.py
index 3e389181..497c0db1 100644
--- a/lingua_franca/internal.py
+++ b/lingua_franca/internal.py
@@ -561,6 +561,14 @@ def _call_localized_function(func, *args, **kwargs):
             # If we didn't find a localized function to correspond with
             # the wrapped function, we cached NotImplementedError in its
             # place.
+
+            # first account for the function not being present in any
+            # module, meaning all modules are falling back to a catch all
+            # parser, this usually means the function will need localization
+            # only in future languages not currently supported
+            if func_name not in _localized_functions[_module_name][lang_code]:
+                raise FunctionNotLocalizedError(func_name, lang_code)
+
             loc_signature = _localized_functions[_module_name][lang_code][func_name]
             if isinstance(loc_signature, type(NotImplementedError())):
                 raise loc_signature
diff --git a/lingua_franca/lang/parse_common.py b/lingua_franca/lang/parse_common.py
index 97cf5be7..f140ac9d 100644
--- a/lingua_franca/lang/parse_common.py
+++ b/lingua_franca/lang/parse_common.py
@@ -192,6 +192,18 @@ def normalize(self, utterance="", remove_articles=None):
         return utterance
 
 
+def normalize_decimals(text, decimal, lang=""):
+    """
+        Replace 'decimal' with decimal periods so Python can floatify them
+    """
+    regex = r"\b\d+" + decimal + r"{1}\d+\b"
+    sanitize_decimals = re.compile(regex)
+    for _, match in enumerate(re.finditer(sanitize_decimals, text)):
+        text = text.replace(match.group(
+            0), match.group(0).replace(decimal, '.'))
+    return text
+
+
 def match_yes_or_no(text, lang):
     resource_file = resolve_resource_file(f"text/{lang}/yesno.json")
     if not resource_file:
diff --git a/lingua_franca/lang/parse_cs.py b/lingua_franca/lang/parse_cs.py
index e0144b02..7119f738 100644
--- a/lingua_franca/lang/parse_cs.py
+++ b/lingua_franca/lang/parse_cs.py
@@ -23,7 +23,7 @@
     _LONG_ORDINAL_CS, _LONG_SCALE_CS, _SHORT_SCALE_CS, _SHORT_ORDINAL_CS, \
     _FRACTION_STRING_CS, _MONTHS_CONVERSION, _MONTHS_CZECH, _TIME_UNITS_CONVERSION, \
     _ORDINAL_BASE_CS  # _ARTICLES_CS
-
+from lingua_franca.lang.parse_common import normalize_decimals
 import re
 import json
 from lingua_franca import resolve_resource_file
@@ -579,7 +579,7 @@ def _initialize_number_data(short_scale):
     return multiplies, string_num_ordinal_cs, string_num_scale_cs
 
 
-def extract_number_cs(text, short_scale=True, ordinals=False):
+def extract_number_cs(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -590,11 +590,17 @@ def extract_number_cs(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_cs(tokenize(text.lower()),
                                         short_scale, ordinals).value
 
@@ -1560,20 +1566,24 @@ def isFractional_cs(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_cs(text, short_scale=True, ordinals=False):
+def extract_numbers_cs(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_cs(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_da.py b/lingua_franca/lang/parse_da.py
index 14b18132..c1c30020 100644
--- a/lingua_franca/lang/parse_da.py
+++ b/lingua_franca/lang/parse_da.py
@@ -20,22 +20,31 @@
 from lingua_franca.lang.common_data_da import _DA_NUMBERS
 from lingua_franca.lang.format_da import pronounce_number_da
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
-def extract_number_da(text, short_scale=True, ordinals=False):
+def extract_number_da(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
-
-
-    undefined articles cannot be suppressed in German:
-    'ein Pferd' means 'one horse' and 'a horse'
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -869,20 +878,24 @@ def normalize_da(text, remove_articles=True):
     return normalized[1:]  # strip the initial space
 
 
-def extract_numbers_da(text, short_scale=True, ordinals=False):
+def extract_numbers_da(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_da, extract_number_da,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_de.py b/lingua_franca/lang/parse_de.py
index 95fda48e..2af17e77 100644
--- a/lingua_franca/lang/parse_de.py
+++ b/lingua_franca/lang/parse_de.py
@@ -21,6 +21,7 @@
 from lingua_franca.lang.common_data_de import _DE_NUMBERS
 from lingua_franca.lang.format_de import pronounce_number_de
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 de_numbers = {
@@ -143,20 +144,28 @@ def repl(match):
     return (duration, text)
 
 
-def extract_number_de(text, short_scale=True, ordinals=False):
+def extract_number_de(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
-
-
-    undefined articles cannot be suppressed in German:
-    'ein Pferd' means 'one horse' and 'a horse'
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -1003,20 +1012,27 @@ def normalize_de(text, remove_articles=True):
     return normalized[1:]  # strip the initial space
 
 
-def extract_numbers_de(text, short_scale=True, ordinals=False):
-    """
-        Takes in a string and extracts a list of numbers.
-
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
-    Returns:
-        list: list of extracted numbers as floats
+def extract_numbers_de(text, short_scale=True, ordinals=False, decimal='.'):
     """
+       This function extracts a number from a text string,
+       handles pronunciations in long scale and short scale
+
+       https://en.wikipedia.org/wiki/Names_of_large_numbers
+
+       Args:
+           text (str): the string to normalize
+           short_scale (bool): use short scale if True, long scale if False
+           ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+           decimal (str): character to use as decimal point. defaults to '.'
+       Returns:
+           list: list of extracted numbers as floats
+       Note:
+           will always extract numbers formatted with a decimal dot/full stop,
+           such as '3.5', even if 'decimal' is specified.
+
+       """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_de, extract_number_de,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py
index a51ee02c..d7d4902c 100644
--- a/lingua_franca/lang/parse_en.py
+++ b/lingua_franca/lang/parse_en.py
@@ -29,6 +29,7 @@
 from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \
     invert_dict, ReplaceableNumber, partition_list, tokenize, Token, Normalizer
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False):
@@ -529,7 +530,7 @@ def _initialize_number_data_en(short_scale, speech=True):
     return multiplies, string_num_ordinal_en, string_num_scale_en
 
 
-def extract_number_en(text, short_scale=True, ordinals=False):
+def extract_number_en(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -540,11 +541,17 @@ def extract_number_en(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_en(tokenize(text.lower()),
                                         short_scale, ordinals).value
 
@@ -1655,7 +1662,7 @@ def is_fractional_en(input_str, short_scale=True, spoken=True):
     return False
 
 
-def extract_numbers_en(text, short_scale=True, ordinals=False):
+def extract_numbers_en(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -1666,9 +1673,15 @@ def extract_numbers_en(text, short_scale=True, ordinals=False):
             is now common in most English speaking countries.
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_en(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_es.py b/lingua_franca/lang/parse_es.py
index 0a810cc4..de718d12 100644
--- a/lingua_franca/lang/parse_es.py
+++ b/lingua_franca/lang/parse_es.py
@@ -20,6 +20,7 @@
 from lingua_franca.lang.format_es import pronounce_number_es
 from lingua_franca.lang.parse_common import *
 from lingua_franca.lang.common_data_es import _ARTICLES_ES, _STRING_NUM_ES
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def is_fractional_es(input_str, short_scale=True):
@@ -56,16 +57,28 @@ def is_fractional_es(input_str, short_scale=True):
     return False
 
 
-def extract_number_es(text, short_scale=True, ordinals=False):
+def extract_number_es(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -268,20 +281,24 @@ def es_number(i):
     return es_number(i)
 
 
-def extract_numbers_es(text, short_scale=True, ordinals=False):
+def extract_numbers_es(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_es,
                                    extract_number_es, short_scale=short_scale,
                                    ordinals=ordinals)
diff --git a/lingua_franca/lang/parse_eu.py b/lingua_franca/lang/parse_eu.py
index 8d10162b..46ec0c80 100644
--- a/lingua_franca/lang/parse_eu.py
+++ b/lingua_franca/lang/parse_eu.py
@@ -23,6 +23,7 @@
 from lingua_franca.lang.format_eu import pronounce_number_eu
 from lingua_franca.lang.parse_common import *
 from lingua_franca.lang.common_data_eu import _NUM_STRING_EU
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def isFractional_eu(input_str):
@@ -283,20 +284,27 @@ def eu_number(i):
     return eu_number(i)
 
 
-def extract_numbers_eu(text, short_scale=True, ordinals=False):
+def extract_numbers_eu(text, short_scale=True, ordinals=False, decimal='.'):
     """
-        Takes in a string and extracts a list of numbers.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_eu, extract_number_eu,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_fa.py b/lingua_franca/lang/parse_fa.py
index 753ac8eb..b888ae32 100644
--- a/lingua_franca/lang/parse_fa.py
+++ b/lingua_franca/lang/parse_fa.py
@@ -19,6 +19,7 @@
                                                _FARSI_ONES, _FARSI_TENS,
                                                _FORMAL_VARIANT)
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def _is_number(s):
@@ -307,20 +308,24 @@ def extract_datetime_fa(text, anchorDate=None, default_time=None):
     return (result, " ".join(remainder))
 
 
-def extract_numbers_fa(text, short_scale=True, ordinals=False):
+def extract_numbers_fa(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
 
     ar = _parse_sentence(text)
     result = []
@@ -330,7 +335,7 @@ def extract_numbers_fa(text, short_scale=True, ordinals=False):
     return result
 
 
-def extract_number_fa(text, ordinals=False):
+def extract_number_fa(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -341,11 +346,17 @@ def extract_number_fa(text, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     x = extract_numbers_fa(text, ordinals=ordinals)
     if (len(x) == 0):
         return False
diff --git a/lingua_franca/lang/parse_fr.py b/lingua_franca/lang/parse_fr.py
index 9728653f..97364182 100644
--- a/lingua_franca/lang/parse_fr.py
+++ b/lingua_franca/lang/parse_fr.py
@@ -23,6 +23,7 @@
 from lingua_franca.lang.common_data_fr import _ARTICLES_FR, _NUMBERS_FR, \
     _ORDINAL_ENDINGS_FR
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def extract_duration_fr(text):
@@ -369,13 +370,28 @@ def _number_ordinal_fr(words, i):
     return None
 
 
-def extract_number_fr(text, short_scale=True, ordinals=False):
-    """Takes in a string and extracts a number.
+def extract_number_fr(text, short_scale=True, ordinals=False, decimal='.'):
+    """
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
-        text (str): the string to extract a number from
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (str): The number extracted or the original text.
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
@@ -1067,20 +1083,24 @@ def normalize_fr(text, remove_articles=True):
     return normalized[1:]  # strip the initial space
 
 
-def extract_numbers_fr(text, short_scale=True, ordinals=False):
+def extract_numbers_fr(text, short_scale=True, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+       Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_fr, extract_number_fr,
                                    short_scale=short_scale, ordinals=ordinals)
 
diff --git a/lingua_franca/lang/parse_it.py b/lingua_franca/lang/parse_it.py
index 88c7455d..c7eaea35 100644
--- a/lingua_franca/lang/parse_it.py
+++ b/lingua_franca/lang/parse_it.py
@@ -28,6 +28,7 @@
     pronounce_number_it
 from lingua_franca.lang.common_data_it import _SHORT_ORDINAL_STRING_IT, \
     _ARTICLES_IT, _LONG_ORDINAL_STRING_IT, _STRING_NUM_IT
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def is_fractional_it(input_str, short_scale=False):
@@ -224,7 +225,7 @@ def _extract_number_long_it(word):
     return value
 
 
-def extract_number_it(text, short_scale=False, ordinals=False):
+def extract_number_it(text, short_scale=False, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -235,11 +236,17 @@ def extract_number_it(text, short_scale=False, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
 
     text = text.lower()
     string_num_ordinal_it = {}
@@ -1148,20 +1155,24 @@ def get_gender_it(word, context=""):
     return gender
 
 
-def extract_numbers_it(text, short_scale=False, ordinals=False):
+def extract_numbers_it(text, short_scale=False, ordinals=False, decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
-    Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+     Args:
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return extract_numbers_generic(text, pronounce_number_it,
                                    extract_number_it,
                                    short_scale=short_scale, ordinals=ordinals)
diff --git a/lingua_franca/lang/parse_nl.py b/lingua_franca/lang/parse_nl.py
index ba197704..8a991b77 100644
--- a/lingua_franca/lang/parse_nl.py
+++ b/lingua_franca/lang/parse_nl.py
@@ -25,6 +25,7 @@
     _NEGATIVES_NL, _SHORT_SCALE_NL, _STRING_LONG_ORDINAL_NL, _STRING_NUM_NL, \
     _STRING_SHORT_ORDINAL_NL, _SUMS_NL
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 import re
 
 
@@ -414,10 +415,10 @@ def _initialize_number_data_nl(short_scale):
     return multiplies, string_num_ordinal_nl, string_num_scale_nl
 
 
-def extract_number_nl(text, short_scale=True, ordinals=False):
-    """Extract a number from a text string
-
-    The function handles pronunciations in long scale and short scale
+def extract_number_nl(text, short_scale=True, ordinals=False, decimal='.'):
+    """
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
 
     https://en.wikipedia.org/wiki/Names_of_large_numbers
 
@@ -425,10 +426,17 @@ def extract_number_nl(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_nl(tokenize(text.lower()),
                                         short_scale, ordinals).value
 
@@ -1294,19 +1302,23 @@ def is_fractional_nl(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_nl(text, short_scale=True, ordinals=False):
+def extract_numbers_nl(text, short_scale=True, ordinals=False, decimal='.'):
     """Takes in a string and extracts a list of numbers.
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_nl(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_pl.py b/lingua_franca/lang/parse_pl.py
index 84f83bc8..53f2f43f 100644
--- a/lingua_franca/lang/parse_pl.py
+++ b/lingua_franca/lang/parse_pl.py
@@ -24,6 +24,7 @@
     _TIME_UNITS_NORMALIZATION, _MONTHS_TO_EN, _DAYS_TO_EN, _ORDINAL_BASE_PL, \
     _ALT_ORDINALS_PL
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 import re
 
 
@@ -576,7 +577,7 @@ def _initialize_number_data(short_scale):
     return multiplies, _STRING_SHORT_ORDINAL_PL, string_num_scale
 
 
-def extract_number_pl(text, short_scale=True, ordinals=False):
+def extract_number_pl(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -587,11 +588,17 @@ def extract_number_pl(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     return _extract_number_with_text_pl(tokenize(text.lower()),
                                         True, ordinals).value
 
@@ -1333,20 +1340,27 @@ def isFractional_pl(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_pl(text, short_scale=True, ordinals=False):
+def extract_numbers_pl(text, short_scale=True, ordinals=False, decimal='.'):
     """
-        Takes in a string and extracts a list of numbers.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_pl(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_pt.py b/lingua_franca/lang/parse_pt.py
index 356c1e83..c4533063 100644
--- a/lingua_franca/lang/parse_pt.py
+++ b/lingua_franca/lang/parse_pt.py
@@ -29,6 +29,7 @@
 from lingua_franca.internal import resolve_resource_file
 from lingua_franca.lang.parse_common import Normalizer
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 import json
 import re
 import unicodedata
@@ -77,16 +78,28 @@ def is_fractional_pt(input_str, short_scale=True):
     return False
 
 
-def extract_number_pt(text, short_scale=True, ordinals=False):
+def extract_number_pt(text, short_scale=True, ordinals=False, decimal='.'):
     """
-    This function prepares the given text for parsing by making
-    numbers consistent, getting rid of contractions, etc.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
+
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
-        (int) or (float): The value of extracted number
+        (int) or (float) or False: The extracted number or False if no number
+                                   was found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
 
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API compatibility
     # reasons.
diff --git a/lingua_franca/lang/parse_ru.py b/lingua_franca/lang/parse_ru.py
index cd041ec7..d5345747 100644
--- a/lingua_franca/lang/parse_ru.py
+++ b/lingua_franca/lang/parse_ru.py
@@ -28,6 +28,7 @@
 import json
 from lingua_franca import resolve_resource_file
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 
 def generate_plurals_ru(originals):
@@ -1577,20 +1578,27 @@ def is_fractional_ru(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_ru(text, short_scale=True, ordinals=False):
+def extract_numbers_ru(text, short_scale=True, ordinals=False, decimal='.'):
     """
-        Takes in a string and extracts a list of numbers.
+    This function extracts a number from a text string,
+    handles pronunciations in long scale and short scale
+
+    https://en.wikipedia.org/wiki/Names_of_large_numbers
 
     Args:
-        text (str): the string to extract a number from
-        short_scale (bool): Use "short scale" or "long scale" for large
-            numbers -- over a million.  The default is short scale, which
-            is now common in most English speaking countries.
-            See https://en.wikipedia.org/wiki/Names_of_large_numbers
-        ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
+
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     results = _extract_numbers_with_text_ru(tokenize(text),
                                             short_scale, ordinals)
     return [float(result.value) for result in results]
diff --git a/lingua_franca/lang/parse_sv.py b/lingua_franca/lang/parse_sv.py
index 02164111..bb23f2ee 100644
--- a/lingua_franca/lang/parse_sv.py
+++ b/lingua_franca/lang/parse_sv.py
@@ -17,6 +17,7 @@
 from dateutil.relativedelta import relativedelta
 
 from lingua_franca.time import now_local
+from lingua_franca.lang.parse_common import normalize_decimals
 
 from .parse_common import (is_numeric, look_for_fractions, Normalizer,
                            tokenize, Token)
@@ -156,15 +157,23 @@ def extract_duration_sv(text):
     return (td, remainder) if valid else None
 
 
-def extract_number_sv(text, short_scale=True, ordinals=False):
+def extract_number_sv(text, short_scale=True, ordinals=False, decimal='.'):
     """
     This function prepares the given text for parsing by making
     numbers consistent, getting rid of contractions, etc.
     Args:
         text (str): the string to normalize
+        short_scale (bool): use short scale if True, long scale if False
+        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int) or (float): The value of extracted number
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
+    if decimal != '.':
+        text = normalize_decimals(text, decimal)
     # TODO: short_scale and ordinals don't do anything here.
     # The parameters are present in the function signature for API
     # compatibility reasons.
diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
index f1602717..4870a8a7 100644
--- a/lingua_franca/parse.py
+++ b/lingua_franca/parse.py
@@ -16,6 +16,7 @@
 import json
 from lingua_franca.util import match_one, fuzzy_match, MatchStrategy
 from lingua_franca.lang.parse_common import match_yes_or_no
+import re
 from difflib import SequenceMatcher
 from warnings import warn
 from lingua_franca.time import now_local
@@ -56,7 +57,8 @@ def extract_langcode(text, lang=""):
 
 
 @localized_function()
-def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
+def extract_numbers(text, short_scale=True, ordinals=False, lang='',
+                    decimal='.'):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -69,13 +71,18 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str, optional): an optional BCP-47 language code, if omitted
                               the default language will be used.
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
 
 
 @localized_function()
-def extract_number(text, short_scale=True, ordinals=False, lang=''):
+def extract_number(text, short_scale=True, ordinals=False, lang='',
+                   decimal='.'):
     """Takes in a string and extracts a number.
 
     Args:
@@ -87,9 +94,13 @@ def extract_number(text, short_scale=True, ordinals=False, lang=''):
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str, optional): an optional BCP-47 language code, if omitted
                               the default language will be used.
+        decimal (str): character to use as decimal point. defaults to '.'
     Returns:
         (int, float or False): The number extracted or False if the input
                                text contains no numbers
+    Note:
+        will always extract numbers formatted with a decimal dot/full stop,
+        such as '3.5', even if 'decimal' is specified.
     """
 
 
diff --git a/test/unittests/test_parse_en.py b/test/unittests/test_parse_en.py
index caae8999..7aeb3df9 100644
--- a/test/unittests/test_parse_en.py
+++ b/test/unittests/test_parse_en.py
@@ -290,6 +290,17 @@ def test_combinations(self):
 
 
 class TestExtractNumber(unittest.TestCase):
+    def test_extract_number_decimal_markers(self):
+        # Test decimal normalization
+        self.assertEqual(extract_number("4,4", decimal=','), 4.4)
+        self.assertEqual(extract_number("we have 3,5 kilometers to go",
+                                        decimal=','), 3.5)
+        self.assertEqual(extract_numbers("this is a seven eight 9,5 test",
+                                         decimal=','),
+                         [7.0, 8.0, 9.5])
+        self.assertEqual(extract_numbers("this is a 7,0 8.0 9,6 test",
+                                         decimal=','), [7.0, 8.0, 9.6])
+
     def test_extract_number_priority(self):
         # sanity check
         self.assertEqual(extract_number("third", ordinals=True), 3)