From 9da700e964a796aeb35d6f87e75d768bf338e99e Mon Sep 17 00:00:00 2001 From: Pain Date: Fri, 28 Oct 2022 18:49:51 +0200 Subject: [PATCH 1/9] Update ci.yml Check the support for python 3,10 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ccacc72..d0ab320 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: [3.7, 3.8, 3.9] + python: [3.7, 3.8, 3.9, 3.10] steps: - name: Checkout the repository From dbd4f423be36961f1ee1e8a83319dd0e0667c032 Mon Sep 17 00:00:00 2001 From: Pain Date: Fri, 28 Oct 2022 18:58:20 +0200 Subject: [PATCH 2/9] Update ci.yml I am adding quotes around 3.10 to reading it correctly by YAML --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0ab320..b975e61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: [3.7, 3.8, 3.9, 3.10] + python: [3.7, 3.8, 3.9, '3.10'] steps: - name: Checkout the repository From 4c89c568818d8d36d12530a7aa11b0ee583cdc59 Mon Sep 17 00:00:00 2001 From: Pain Date: Fri, 28 Oct 2022 19:08:04 +0200 Subject: [PATCH 3/9] Update ci.yml Testing the support for Python 3.11 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b975e61..eb0fce6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: [3.7, 3.8, 3.9, '3.10'] + python: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - name: Checkout the repository From b3722dcab07c357b4a001fc7cfe21310cdfa98f2 Mon Sep 17 00:00:00 2001 From: Pain Date: Fri, 28 Oct 2022 19:42:35 +0200 Subject: [PATCH 4/9] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eb0fce6..0dd97e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python: ['3.7', '3.8', '3.9', '3.10'] steps: - name: Checkout the repository From 2e357bdda7bc3d61859ea1f4276c1a25d6897f55 Mon Sep 17 00:00:00 2001 From: MuhammadAlBarham Date: Mon, 31 Oct 2022 12:50:43 +0200 Subject: [PATCH 5/9] Update pyproject.toml --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 98d48dd..4394dae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,8 @@ GitPython = "^3.1.24" PyGithub = "^1.55" linuxdoc = "^20210324" datasets = "^1.18.2" +numpy = "1.21.5" +pandas = "^1.3.1" [tool.poetry.urls] "Bug Tracker" = "https://github.com/TRoboto/Maha/issues" From 8b8d5403e023ac3bda2dffd06acbcd693573de86 Mon Sep 17 00:00:00 2001 From: MuhammadAlBarham Date: Tue, 1 Nov 2022 21:30:18 +0200 Subject: [PATCH 6/9] Update pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4394dae..63c19a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,8 +52,7 @@ GitPython = "^3.1.24" PyGithub = "^1.55" linuxdoc = "^20210324" datasets = "^1.18.2" -numpy = "1.21.5" -pandas = "^1.3.1" + [tool.poetry.urls] "Bug Tracker" = "https://github.com/TRoboto/Maha/issues" From 2fc631928103e8110de9a9521df562af8759cd26 Mon Sep 17 00:00:00 2001 From: MuhammadAlBarham Date: Tue, 1 Nov 2022 21:30:36 +0200 Subject: [PATCH 7/9] Delete pyproject.toml --- pyproject.toml | 78 -------------------------------------------------- 1 file changed, 78 deletions(-) delete mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 63c19a0..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,78 +0,0 @@ -[tool.poetry] -name = "mahad" -version = "0.3.0" -description = "An Arabic text processing library intended for use in NLP applications." -authors = ["Mohammad Al-Fetyani "] -license = "BSD-3-Clause" -readme="README.md" -repository="https://github.com/TRoboto/Maha" -documentation="https://maha.readthedocs.io" -keywords=["Arabic", "NLP", "Text Processing", "Parsers", "Stream Text Processing"] - -classifiers= [ - "Development Status :: 4 - Beta", - "License :: OSI Approved :: BSD License", - "Topic :: Scientific/Engineering", - "Topic :: Text Processing", - "Topic :: Utilities", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Natural Language :: Arabic", - "Natural Language :: English", - ] -exclude = ["tools/","images/",".vscode/"] -packages = [ - { include = "maha" }, -] - -[tool.poetry.dependencies] -python = "^3.7.1" -tqdm = "^4.61.1" -python-dateutil = "^2.8.2" -regex = "^2021.8.28" -typing-extensions = "^3.10.0" -hijri-converter = "^2.2.3" - -[tool.poetry.dev-dependencies] -pre-commit = "^2.13.0" -black = "^21.5b1" -isort = "^5.8.0" -Sphinx = "^4.1.2" -furo = "^2021.8.31" -pytest = "^6.2.4" -sphinx-copybutton = "^0.3.1" -pytest-cov = "^2.12.1" -blacken-docs = "^1.10.0" -mypy = "^0.910" -tox = "^3.24.3" -types-python-dateutil = "^2.8.0" -sphinx-autoapi = "^1.8.4" -GitPython = "^3.1.24" -PyGithub = "^1.55" -linuxdoc = "^20210324" -datasets = "^1.18.2" - - -[tool.poetry.urls] -"Bug Tracker" = "https://github.com/TRoboto/Maha/issues" -"Discord" = "https://discord.gg/6W2tRFE7k4" -"Documentation" = "https://maha.readthedocs.io" - - -[tool.isort] -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -use_parentheses = true -ensure_newline_before_comments = true -line_length = 88 - -[tool.mypy] -exclude = "tests|tools|docs" -ignore_missing_imports = true - - -[build-system] -requires = ["setuptools","poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" From e9d1fe9527ef2efd66d468714853a3e724009f81 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Nov 2022 19:32:08 +0000 Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- maha/cleaners/functions/contains_fn.py | 47 +++-------- maha/cleaners/functions/keep_fn.py | 25 ++---- maha/cleaners/functions/normalize_fn.py | 27 ++----- maha/cleaners/functions/remove_fn.py | 46 +++-------- maha/cleaners/functions/replace_fn.py | 15 +--- maha/expressions/arabic.py | 15 +--- maha/expressions/english.py | 11 +-- maha/expressions/general.py | 18 +---- maha/parsers/functions/parse_dimensions.py | 10 +-- maha/parsers/functions/parse_fn.py | 43 +++------- maha/parsers/rules/common.py | 11 +-- maha/parsers/rules/distance/rule.py | 10 +-- maha/parsers/rules/duration/rule.py | 15 +--- maha/parsers/rules/numeral/rule.py | 23 ++---- maha/parsers/rules/ordinal/rule.py | 18 +---- maha/parsers/rules/time/values.py | 59 ++++---------- maha/parsers/utils.py | 13 +-- maha/processors/base_processor.py | 19 ++--- tests/cleaners/test_contains.py | 11 +-- tests/cleaners/test_keep.py | 13 ++- tests/cleaners/test_normalize.py | 3 +- tests/cleaners/test_remove.py | 93 ++++++---------------- tests/cleaners/test_replace.py | 25 ++---- tests/parsers/test_duration.py | 3 +- tests/parsers/test_parse.py | 11 +-- tests/processors/test_base_processor.py | 9 +-- tests/test_deprecation.py | 3 +- 27 files changed, 158 insertions(+), 438 deletions(-) diff --git a/maha/cleaners/functions/contains_fn.py b/maha/cleaners/functions/contains_fn.py index 5de12de..c8ff232 100644 --- a/maha/cleaners/functions/contains_fn.py +++ b/maha/cleaners/functions/contains_fn.py @@ -13,40 +13,19 @@ import regex as re -from maha.constants import ( - ALL_HARAKAT, - ARABIC, - ARABIC_LETTERS, - ARABIC_LIGATURES, - ARABIC_NUMBERS, - ARABIC_PUNCTUATIONS, - EMPTY, - ENGLISH, - ENGLISH_CAPITAL_LETTERS, - ENGLISH_LETTERS, - ENGLISH_NUMBERS, - ENGLISH_PUNCTUATIONS, - ENGLISH_SMALL_LETTERS, - HARAKAT, - LAM_ALEF, - LAM_ALEF_VARIATIONS, - NUMBERS, - PERSIAN, - PUNCTUATIONS, - SPACE, - TATWEEL, -) -from maha.expressions import ( - EXPRESSION_ARABIC_HASHTAGS, - EXPRESSION_ARABIC_MENTIONS, - EXPRESSION_EMAILS, - EXPRESSION_EMOJIS, - EXPRESSION_ENGLISH_HASHTAGS, - EXPRESSION_ENGLISH_MENTIONS, - EXPRESSION_HASHTAGS, - EXPRESSION_LINKS, - EXPRESSION_MENTIONS, -) +from maha.constants import (ALL_HARAKAT, ARABIC, ARABIC_LETTERS, + ARABIC_LIGATURES, ARABIC_NUMBERS, + ARABIC_PUNCTUATIONS, EMPTY, ENGLISH, + ENGLISH_CAPITAL_LETTERS, ENGLISH_LETTERS, + ENGLISH_NUMBERS, ENGLISH_PUNCTUATIONS, + ENGLISH_SMALL_LETTERS, HARAKAT, LAM_ALEF, + LAM_ALEF_VARIATIONS, NUMBERS, PERSIAN, + PUNCTUATIONS, SPACE, TATWEEL) +from maha.expressions import (EXPRESSION_ARABIC_HASHTAGS, + EXPRESSION_ARABIC_MENTIONS, EXPRESSION_EMAILS, + EXPRESSION_EMOJIS, EXPRESSION_ENGLISH_HASHTAGS, + EXPRESSION_ENGLISH_MENTIONS, EXPRESSION_HASHTAGS, + EXPRESSION_LINKS, EXPRESSION_MENTIONS) from maha.rexy import Expression, ExpressionGroup from maha.utils import check_positive_integer diff --git a/maha/cleaners/functions/keep_fn.py b/maha/cleaners/functions/keep_fn.py index ca75f15..e19a13a 100644 --- a/maha/cleaners/functions/keep_fn.py +++ b/maha/cleaners/functions/keep_fn.py @@ -13,25 +13,12 @@ ] import maha.cleaners.functions as functions -from maha.constants import ( - ALL_HARAKAT, - ARABIC, - ARABIC_LETTERS, - ARABIC_NUMBERS, - ARABIC_PUNCTUATIONS, - EMPTY, - ENGLISH, - ENGLISH_CAPITAL_LETTERS, - ENGLISH_LETTERS, - ENGLISH_NUMBERS, - ENGLISH_PUNCTUATIONS, - ENGLISH_SMALL_LETTERS, - HARAKAT, - NUMBERS, - PUNCTUATIONS, - SPACE, - TATWEEL, -) +from maha.constants import (ALL_HARAKAT, ARABIC, ARABIC_LETTERS, + ARABIC_NUMBERS, ARABIC_PUNCTUATIONS, EMPTY, + ENGLISH, ENGLISH_CAPITAL_LETTERS, ENGLISH_LETTERS, + ENGLISH_NUMBERS, ENGLISH_PUNCTUATIONS, + ENGLISH_SMALL_LETTERS, HARAKAT, NUMBERS, + PUNCTUATIONS, SPACE, TATWEEL) def keep( diff --git a/maha/cleaners/functions/normalize_fn.py b/maha/cleaners/functions/normalize_fn.py index 5abb24e..6113877 100644 --- a/maha/cleaners/functions/normalize_fn.py +++ b/maha/cleaners/functions/normalize_fn.py @@ -8,26 +8,13 @@ import maha.cleaners.functions as functions -from maha.constants import ( - ALEF, - ALEF_MADDA_ABOVE, - ALEF_SUPERSCRIPT, - ALEF_VARIATIONS, - ARABIC_LIGATURES, - ARABIC_LIGATURES_NORMALIZED, - EMPTY, - HEH, - LAM, - LAM_ALEF_VARIATIONS, - LAM_ALEF_VARIATIONS_NORMALIZED, - MADDAH_ABOVE, - SPACE, - TEH_MARBUTA, - WAW, - WAW_VARIATIONS, - YEH, - YEH_VARIATIONS, -) +from maha.constants import (ALEF, ALEF_MADDA_ABOVE, ALEF_SUPERSCRIPT, + ALEF_VARIATIONS, ARABIC_LIGATURES, + ARABIC_LIGATURES_NORMALIZED, EMPTY, HEH, LAM, + LAM_ALEF_VARIATIONS, + LAM_ALEF_VARIATIONS_NORMALIZED, MADDAH_ABOVE, + SPACE, TEH_MARBUTA, WAW, WAW_VARIATIONS, YEH, + YEH_VARIATIONS) from maha.expressions import EXPRESSION_ALL_SPACES diff --git a/maha/cleaners/functions/remove_fn.py b/maha/cleaners/functions/remove_fn.py index 47786b8..bcc44f7 100644 --- a/maha/cleaners/functions/remove_fn.py +++ b/maha/cleaners/functions/remove_fn.py @@ -27,40 +27,18 @@ import maha.cleaners.functions as functions -from maha.constants import ( - ALL_HARAKAT, - ARABIC, - ARABIC_DOTLESS_MAP, - ARABIC_LETTERS, - ARABIC_LIGATURES, - ARABIC_NUMBERS, - ARABIC_PUNCTUATIONS, - DOTLESS_NOON_GHUNNA, - EMPTY, - ENGLISH, - ENGLISH_CAPITAL_LETTERS, - ENGLISH_LETTERS, - ENGLISH_NUMBERS, - ENGLISH_PUNCTUATIONS, - ENGLISH_SMALL_LETTERS, - HARAKAT, - NOON, - NUMBERS, - PUNCTUATIONS, - SPACE, - TATWEEL, -) -from maha.expressions import ( - EXPRESSION_ARABIC_HASHTAGS, - EXPRESSION_ARABIC_MENTIONS, - EXPRESSION_EMAILS, - EXPRESSION_EMOJIS, - EXPRESSION_ENGLISH_HASHTAGS, - EXPRESSION_ENGLISH_MENTIONS, - EXPRESSION_HASHTAGS, - EXPRESSION_LINKS, - EXPRESSION_MENTIONS, -) +from maha.constants import (ALL_HARAKAT, ARABIC, ARABIC_DOTLESS_MAP, + ARABIC_LETTERS, ARABIC_LIGATURES, ARABIC_NUMBERS, + ARABIC_PUNCTUATIONS, DOTLESS_NOON_GHUNNA, EMPTY, + ENGLISH, ENGLISH_CAPITAL_LETTERS, ENGLISH_LETTERS, + ENGLISH_NUMBERS, ENGLISH_PUNCTUATIONS, + ENGLISH_SMALL_LETTERS, HARAKAT, NOON, NUMBERS, + PUNCTUATIONS, SPACE, TATWEEL) +from maha.expressions import (EXPRESSION_ARABIC_HASHTAGS, + EXPRESSION_ARABIC_MENTIONS, EXPRESSION_EMAILS, + EXPRESSION_EMOJIS, EXPRESSION_ENGLISH_HASHTAGS, + EXPRESSION_ENGLISH_MENTIONS, EXPRESSION_HASHTAGS, + EXPRESSION_LINKS, EXPRESSION_MENTIONS) from maha.rexy import Expression, ExpressionGroup from maha.utils import check_positive_integer diff --git a/maha/cleaners/functions/replace_fn.py b/maha/cleaners/functions/replace_fn.py index 5333093..e03790a 100644 --- a/maha/cleaners/functions/replace_fn.py +++ b/maha/cleaners/functions/replace_fn.py @@ -18,19 +18,8 @@ # To enjoy infinite width lookbehind import regex as re -from maha.constants import ( - ARABIC_LETTERS, - ARABIC_NUMBERS, - BEH, - EMPTY, - ENGLISH_NUMBERS, - FEH, - KAF, - LAM, - SPACE, - TEH, - WAW, -) +from maha.constants import (ARABIC_LETTERS, ARABIC_NUMBERS, BEH, EMPTY, + ENGLISH_NUMBERS, FEH, KAF, LAM, SPACE, TEH, WAW) from maha.rexy import Expression, ExpressionGroup diff --git a/maha/expressions/arabic.py b/maha/expressions/arabic.py index a27da12..b8136d8 100644 --- a/maha/expressions/arabic.py +++ b/maha/expressions/arabic.py @@ -3,18 +3,9 @@ __all__ = ["EXPRESSION_ARABIC_HASHTAGS", "EXPRESSION_ARABIC_MENTIONS"] import re -from maha.constants import ( - ALL_HARAKAT, - AND_SIGN, - ARABIC_LETTERS, - ARABIC_NUMBERS, - AT_SIGN, - ENGLISH_NUMBERS, - HASHTAG, - PUNCTUATIONS, - TATWEEL, - UNDERSCORE, -) +from maha.constants import (ALL_HARAKAT, AND_SIGN, ARABIC_LETTERS, + ARABIC_NUMBERS, AT_SIGN, ENGLISH_NUMBERS, HASHTAG, + PUNCTUATIONS, TATWEEL, UNDERSCORE) from maha.rexy import Expression EXPRESSION_ARABIC_HASHTAGS = Expression( diff --git a/maha/expressions/english.py b/maha/expressions/english.py index f519250..6fce015 100644 --- a/maha/expressions/english.py +++ b/maha/expressions/english.py @@ -4,15 +4,8 @@ import re -from maha.constants import ( - AND_SIGN, - AT_SIGN, - ENGLISH_LETTERS, - ENGLISH_NUMBERS, - HASHTAG, - PUNCTUATIONS, - UNDERSCORE, -) +from maha.constants import (AND_SIGN, AT_SIGN, ENGLISH_LETTERS, + ENGLISH_NUMBERS, HASHTAG, PUNCTUATIONS, UNDERSCORE) from maha.rexy import Expression EXPRESSION_ENGLISH_HASHTAGS = Expression( diff --git a/maha/expressions/general.py b/maha/expressions/general.py index f57e1fc..84cfc66 100644 --- a/maha/expressions/general.py +++ b/maha/expressions/general.py @@ -16,20 +16,10 @@ import re -from maha.constants import ( - AND_SIGN, - ARABIC_COMMA, - ARABIC_DECIMAL_SEPARATOR, - ARABIC_NUMBERS, - ARABIC_THOUSANDS_SEPARATOR, - AT_SIGN, - COMMA, - ENGLISH_NUMBERS, - HASHTAG, - PUNCTUATIONS, - SPACE, - UNDERSCORE, -) +from maha.constants import (AND_SIGN, ARABIC_COMMA, ARABIC_DECIMAL_SEPARATOR, + ARABIC_NUMBERS, ARABIC_THOUSANDS_SEPARATOR, + AT_SIGN, COMMA, ENGLISH_NUMBERS, HASHTAG, + PUNCTUATIONS, SPACE, UNDERSCORE) from maha.rexy import Expression EXPRESSION_HASHTAGS = Expression( diff --git a/maha/parsers/functions/parse_dimensions.py b/maha/parsers/functions/parse_dimensions.py index de3611e..318f181 100644 --- a/maha/parsers/functions/parse_dimensions.py +++ b/maha/parsers/functions/parse_dimensions.py @@ -3,14 +3,8 @@ __all__ = ["parse_dimension"] -from maha.parsers.rules import ( - RULE_DISTANCE, - RULE_DURATION, - RULE_NAME, - RULE_NUMERAL, - RULE_ORDINAL, - RULE_TIME, -) +from maha.parsers.rules import (RULE_DISTANCE, RULE_DURATION, RULE_NAME, + RULE_NUMERAL, RULE_ORDINAL, RULE_TIME) from maha.parsers.templates import Dimension, DimensionType from maha.rexy import Expression diff --git a/maha/parsers/functions/parse_fn.py b/maha/parsers/functions/parse_fn.py index 1251561..b1ca11e 100644 --- a/maha/parsers/functions/parse_fn.py +++ b/maha/parsers/functions/parse_fn.py @@ -4,37 +4,18 @@ __all__ = ["parse", "parse_expression"] -from maha.constants import ( - ALL_HARAKAT, - ARABIC, - ARABIC_LETTERS, - ARABIC_LIGATURES, - ARABIC_NUMBERS, - ARABIC_PUNCTUATIONS, - EMPTY, - ENGLISH, - ENGLISH_CAPITAL_LETTERS, - ENGLISH_LETTERS, - ENGLISH_NUMBERS, - ENGLISH_PUNCTUATIONS, - ENGLISH_SMALL_LETTERS, - HARAKAT, - NUMBERS, - PUNCTUATIONS, - SPACE, - TATWEEL, -) -from maha.expressions import ( - EXPRESSION_ARABIC_HASHTAGS, - EXPRESSION_ARABIC_MENTIONS, - EXPRESSION_EMAILS, - EXPRESSION_EMOJIS, - EXPRESSION_ENGLISH_HASHTAGS, - EXPRESSION_ENGLISH_MENTIONS, - EXPRESSION_HASHTAGS, - EXPRESSION_LINKS, - EXPRESSION_MENTIONS, -) +from maha.constants import (ALL_HARAKAT, ARABIC, ARABIC_LETTERS, + ARABIC_LIGATURES, ARABIC_NUMBERS, + ARABIC_PUNCTUATIONS, EMPTY, ENGLISH, + ENGLISH_CAPITAL_LETTERS, ENGLISH_LETTERS, + ENGLISH_NUMBERS, ENGLISH_PUNCTUATIONS, + ENGLISH_SMALL_LETTERS, HARAKAT, NUMBERS, + PUNCTUATIONS, SPACE, TATWEEL) +from maha.expressions import (EXPRESSION_ARABIC_HASHTAGS, + EXPRESSION_ARABIC_MENTIONS, EXPRESSION_EMAILS, + EXPRESSION_EMOJIS, EXPRESSION_ENGLISH_HASHTAGS, + EXPRESSION_ENGLISH_MENTIONS, EXPRESSION_HASHTAGS, + EXPRESSION_LINKS, EXPRESSION_MENTIONS) from maha.parsers.templates import Dimension, DimensionType, TextExpression from maha.rexy import Expression, ExpressionGroup diff --git a/maha/parsers/rules/common.py b/maha/parsers/rules/common.py index b3d60ab..802692d 100644 --- a/maha/parsers/rules/common.py +++ b/maha/parsers/rules/common.py @@ -34,14 +34,9 @@ from maha.constants import ALEF_VARIATIONS, ARABIC_COMMA, COMMA, LAM, WAW from maha.expressions import EXPRESSION_SPACE, EXPRESSION_SPACE_OR_NONE from maha.parsers.templates import Unit, Value -from maha.rexy import ( - Expression, - ExpressionGroup, - non_capturing_group, - optional_non_capturing_group, - positive_lookahead, - positive_lookbehind, -) +from maha.rexy import (Expression, ExpressionGroup, non_capturing_group, + optional_non_capturing_group, positive_lookahead, + positive_lookbehind) @dataclass diff --git a/maha/parsers/rules/distance/rule.py b/maha/parsers/rules/distance/rule.py index 2af0811..9769503 100644 --- a/maha/parsers/rules/distance/rule.py +++ b/maha/parsers/rules/distance/rule.py @@ -18,13 +18,9 @@ from maha.parsers.templates import FunctionValue from maha.rexy import ExpressionGroup, named_group, non_capturing_group -from ..common import ( - FRACTIONS, - combine_patterns, - get_fractions_of_unit_pattern, - spaced_patterns, - wrap_pattern, -) +from ..common import (FRACTIONS, combine_patterns, + get_fractions_of_unit_pattern, spaced_patterns, + wrap_pattern) from .template import DistanceValue from .values import * diff --git a/maha/parsers/rules/duration/rule.py b/maha/parsers/rules/duration/rule.py index 95bd90b..c7bdd22 100644 --- a/maha/parsers/rules/duration/rule.py +++ b/maha/parsers/rules/duration/rule.py @@ -14,20 +14,13 @@ ] -from maha.parsers.rules.numeral.rule import ( - EXPRESSION_NUMERAL_MAP, - RULE_NUMERAL, - _parse_numeral, -) +from maha.parsers.rules.numeral.rule import (EXPRESSION_NUMERAL_MAP, + RULE_NUMERAL, _parse_numeral) from maha.parsers.templates import FunctionValue, Unit from maha.rexy import ExpressionGroup, named_group, non_capturing_group -from ..common import ( - FRACTIONS, - combine_patterns, - get_fractions_of_unit_pattern, - spaced_patterns, -) +from ..common import (FRACTIONS, combine_patterns, + get_fractions_of_unit_pattern, spaced_patterns) from .template import * from .values import * diff --git a/maha/parsers/rules/numeral/rule.py b/maha/parsers/rules/numeral/rule.py index cb5e4c8..e2a9aab 100644 --- a/maha/parsers/rules/numeral/rule.py +++ b/maha/parsers/rules/numeral/rule.py @@ -14,27 +14,16 @@ from functools import reduce -from maha.expressions import EXPRESSION_DECIMAL, EXPRESSION_INTEGER, EXPRESSION_SPACE +from maha.expressions import (EXPRESSION_DECIMAL, EXPRESSION_INTEGER, + EXPRESSION_SPACE) from maha.parsers.rules.ordinal.values import ALEF_LAM from maha.parsers.templates import FunctionValue from maha.parsers.utils import convert_to_number_if_possible -from maha.rexy import ( - ExpressionGroup, - named_group, - non_capturing_group, - optional_non_capturing_group, -) +from maha.rexy import (ExpressionGroup, named_group, non_capturing_group, + optional_non_capturing_group) -from ..common import ( - HALF, - QUARTER, - THIRD, - THREE_QUARTERS, - TWO_THIRDS, - WAW_CONNECTOR, - combine_patterns, - spaced_patterns, -) +from ..common import (HALF, QUARTER, THIRD, THREE_QUARTERS, TWO_THIRDS, + WAW_CONNECTOR, combine_patterns, spaced_patterns) from .values import * NUMERAL_VALUES_GROUP_NAME = "numeral_values" diff --git a/maha/parsers/rules/ordinal/rule.py b/maha/parsers/rules/ordinal/rule.py index 546c422..8dc9fdd 100644 --- a/maha/parsers/rules/ordinal/rule.py +++ b/maha/parsers/rules/ordinal/rule.py @@ -14,21 +14,11 @@ from maha.parsers.templates import FunctionValue -from maha.rexy import ( - Expression, - ExpressionGroup, - named_group, - non_capturing_group, - optional_non_capturing_group, -) +from maha.rexy import (Expression, ExpressionGroup, named_group, + non_capturing_group, optional_non_capturing_group) -from ..common import ( - AFTER, - WAW_CONNECTOR, - combine_patterns, - spaced_patterns, - wrap_pattern, -) +from ..common import (AFTER, WAW_CONNECTOR, combine_patterns, spaced_patterns, + wrap_pattern) from .values import * diff --git a/maha/parsers/rules/time/values.py b/maha/parsers/rules/time/values.py index 100914d..59ff3b2 100644 --- a/maha/parsers/rules/time/values.py +++ b/maha/parsers/rules/time/values.py @@ -5,50 +5,25 @@ import maha.parsers.rules.ordinal.rule as ordinal from maha.constants import ARABIC_COMMA, COMMA, LAM, WAW, arabic, english from maha.expressions import EXPRESSION_SPACE, EXPRESSION_SPACE_OR_NONE -from maha.parsers.rules.duration.values import ( - ONE_DAY, - ONE_HOUR, - ONE_MINUTE, - ONE_MONTH, - ONE_WEEK, - ONE_YEAR, - SEVERAL_DAYS, - SEVERAL_HOURS, - SEVERAL_MINUTES, - SEVERAL_MONTHS, - SEVERAL_WEEKS, - SEVERAL_YEARS, - TWO_DAYS, - TWO_HOURS, - TWO_MINUTES, - TWO_MONTHS, - TWO_WEEKS, - TWO_YEARS, -) -from maha.parsers.rules.ordinal.values import ALEF_LAM, ALEF_LAM_OPTIONAL, ONE_PREFIX +from maha.parsers.rules.duration.values import (ONE_DAY, ONE_HOUR, ONE_MINUTE, + ONE_MONTH, ONE_WEEK, ONE_YEAR, + SEVERAL_DAYS, SEVERAL_HOURS, + SEVERAL_MINUTES, + SEVERAL_MONTHS, SEVERAL_WEEKS, + SEVERAL_YEARS, TWO_DAYS, + TWO_HOURS, TWO_MINUTES, + TWO_MONTHS, TWO_WEEKS, + TWO_YEARS) +from maha.parsers.rules.ordinal.values import (ALEF_LAM, ALEF_LAM_OPTIONAL, + ONE_PREFIX) from maha.parsers.templates import FunctionValue, Value from maha.parsers.templates.value_expressions import MatchedValue -from maha.rexy import ( - Expression, - ExpressionGroup, - named_group, - non_capturing_group, - optional_non_capturing_group, -) - -from ..common import ( - AFTER, - AFTER_NEXT, - ALL_ALEF, - BEFORE, - BEFORE_PREVIOUS, - ELLA, - FRACTIONS, - IN_FROM_AT, - NEXT, - PREVIOUS, - spaced_patterns, -) +from maha.rexy import (Expression, ExpressionGroup, named_group, + non_capturing_group, optional_non_capturing_group) + +from ..common import (AFTER, AFTER_NEXT, ALL_ALEF, BEFORE, BEFORE_PREVIOUS, + ELLA, FRACTIONS, IN_FROM_AT, NEXT, PREVIOUS, + spaced_patterns) from .template import TimeInterval, TimeValue diff --git a/maha/parsers/utils.py b/maha/parsers/utils.py index fbdb824..56d28f2 100644 --- a/maha/parsers/utils.py +++ b/maha/parsers/utils.py @@ -3,16 +3,9 @@ __all__ = ["convert_to_number_if_possible"] -from maha.constants import ( - ARABIC_COMMA, - ARABIC_DECIMAL_SEPARATOR, - ARABIC_THOUSANDS_SEPARATOR, - COMMA, - DOT, - EMPTY, - PERCENT_SIGN, - SPACE, -) +from maha.constants import (ARABIC_COMMA, ARABIC_DECIMAL_SEPARATOR, + ARABIC_THOUSANDS_SEPARATOR, COMMA, DOT, EMPTY, + PERCENT_SIGN, SPACE) def convert_to_number_if_possible(value: str) -> int | float | None: diff --git a/maha/processors/base_processor.py b/maha/processors/base_processor.py index 0aca694..151ef1f 100644 --- a/maha/processors/base_processor.py +++ b/maha/processors/base_processor.py @@ -10,19 +10,12 @@ from functools import partial from typing import Callable -from maha.cleaners.functions import ( - connect_single_letter_word, - contains, - contains_repeated_substring, - contains_single_letter_word, - keep, - normalize, - reduce_repeated_substring, - remove, - replace, - replace_expression, - replace_pairs, -) +from maha.cleaners.functions import (connect_single_letter_word, contains, + contains_repeated_substring, + contains_single_letter_word, keep, + normalize, reduce_repeated_substring, + remove, replace, replace_expression, + replace_pairs) from maha.rexy import Expression, ExpressionGroup from .utils import ObjectGet diff --git a/tests/cleaners/test_contains.py b/tests/cleaners/test_contains.py index 3617276..aee4d09 100644 --- a/tests/cleaners/test_contains.py +++ b/tests/cleaners/test_contains.py @@ -1,12 +1,9 @@ import pytest -from maha.cleaners.functions import ( - contain_strings, - contains, - contains_expressions, - contains_repeated_substring, - contains_single_letter_word, -) +from maha.cleaners.functions import (contain_strings, contains, + contains_expressions, + contains_repeated_substring, + contains_single_letter_word) from maha.constants import EMPTY from maha.expressions import EXPRESSION_EMAILS from maha.rexy import Expression, ExpressionGroup diff --git a/tests/cleaners/test_keep.py b/tests/cleaners/test_keep.py index 271ef19..c2c9df7 100644 --- a/tests/cleaners/test_keep.py +++ b/tests/cleaners/test_keep.py @@ -1,13 +1,10 @@ import pytest -from maha.cleaners.functions import ( - keep, - keep_arabic_characters, - keep_arabic_letters, - keep_arabic_letters_with_harakat, - keep_arabic_with_english_numbers, - keep_strings, -) +from maha.cleaners.functions import (keep, keep_arabic_characters, + keep_arabic_letters, + keep_arabic_letters_with_harakat, + keep_arabic_with_english_numbers, + keep_strings) from maha.constants import ARABIC_LETTERS, ARABIC_NUMBERS, BEH, DOT, SPACE diff --git a/tests/cleaners/test_normalize.py b/tests/cleaners/test_normalize.py index b9b41e2..bddcadc 100644 --- a/tests/cleaners/test_normalize.py +++ b/tests/cleaners/test_normalize.py @@ -1,6 +1,7 @@ import pytest -from maha.cleaners.functions import normalize, normalize_lam_alef, normalize_small_alef +from maha.cleaners.functions import (normalize, normalize_lam_alef, + normalize_small_alef) from maha.constants import ALEF, ALEF_VARIATIONS, EMPTY, SPACE diff --git a/tests/cleaners/test_remove.py b/tests/cleaners/test_remove.py index 071f5a6..dfa72a0 100644 --- a/tests/cleaners/test_remove.py +++ b/tests/cleaners/test_remove.py @@ -1,75 +1,28 @@ import pytest -from maha.cleaners.functions import ( - reduce_repeated_substring, - remove, - remove_all_harakat, - remove_arabic_letter_dots, - remove_emails, - remove_english, - remove_expressions, - remove_extra_spaces, - remove_harakat, - remove_hash_keep_tag, - remove_hashtags, - remove_links, - remove_mentions, - remove_numbers, - remove_punctuations, - remove_strings, - remove_tatweel, -) -from maha.constants import ( - ALL_HARAKAT, - ARABIC, - ARABIC_LETTERS, - ARABIC_LIGATURES, - ARABIC_NUMBERS, - ARABIC_PUNCTUATIONS, - BEH, - DAD, - DOTLESS_BEH, - DOTLESS_DAD, - DOTLESS_FEH, - DOTLESS_GHAIN, - DOTLESS_JEEM, - DOTLESS_KHAH, - DOTLESS_NOON_GHUNNA, - DOTLESS_QAF, - DOTLESS_SHEEN, - DOTLESS_TEH, - DOTLESS_TEH_MARBUTA, - DOTLESS_THAL, - DOTLESS_THEH, - DOTLESS_YEH, - DOTLESS_ZAH, - DOTLESS_ZAIN, - EMPTY, - ENGLISH, - ENGLISH_CAPITAL_LETTERS, - ENGLISH_LETTERS, - ENGLISH_NUMBERS, - ENGLISH_PUNCTUATIONS, - ENGLISH_SMALL_LETTERS, - FEH, - GHAIN, - HARAKAT, - JEEM, - KHAH, - NOON, - NUMBERS, - PUNCTUATIONS, - QAF, - SHEEN, - TATWEEL, - TEH, - TEH_MARBUTA, - THAL, - THEH, - YEH, - ZAH, - ZAIN, -) +from maha.cleaners.functions import (reduce_repeated_substring, remove, + remove_all_harakat, + remove_arabic_letter_dots, remove_emails, + remove_english, remove_expressions, + remove_extra_spaces, remove_harakat, + remove_hash_keep_tag, remove_hashtags, + remove_links, remove_mentions, + remove_numbers, remove_punctuations, + remove_strings, remove_tatweel) +from maha.constants import (ALL_HARAKAT, ARABIC, ARABIC_LETTERS, + ARABIC_LIGATURES, ARABIC_NUMBERS, + ARABIC_PUNCTUATIONS, BEH, DAD, DOTLESS_BEH, + DOTLESS_DAD, DOTLESS_FEH, DOTLESS_GHAIN, + DOTLESS_JEEM, DOTLESS_KHAH, DOTLESS_NOON_GHUNNA, + DOTLESS_QAF, DOTLESS_SHEEN, DOTLESS_TEH, + DOTLESS_TEH_MARBUTA, DOTLESS_THAL, DOTLESS_THEH, + DOTLESS_YEH, DOTLESS_ZAH, DOTLESS_ZAIN, EMPTY, + ENGLISH, ENGLISH_CAPITAL_LETTERS, ENGLISH_LETTERS, + ENGLISH_NUMBERS, ENGLISH_PUNCTUATIONS, + ENGLISH_SMALL_LETTERS, FEH, GHAIN, HARAKAT, JEEM, + KHAH, NOON, NUMBERS, PUNCTUATIONS, QAF, SHEEN, + TATWEEL, TEH, TEH_MARBUTA, THAL, THEH, YEH, ZAH, + ZAIN) from tests.utils import list_not_in_string diff --git a/tests/cleaners/test_replace.py b/tests/cleaners/test_replace.py index b3a35ea..619ec40 100644 --- a/tests/cleaners/test_replace.py +++ b/tests/cleaners/test_replace.py @@ -1,23 +1,12 @@ import pytest -from maha.cleaners.functions import ( - arabic_numbers_to_english, - connect_single_letter_word, - replace, - replace_except, - replace_expression, - replace_pairs, -) -from maha.constants import ( - ARABIC_FOUR, - ARABIC_NUMBERS, - ARABIC_ONE, - ARABIC_TWO, - EMPTY, - ENGLISH_CAPITAL_LETTERS, - ENGLISH_NUMBERS, - ENGLISH_SMALL_LETTERS, -) +from maha.cleaners.functions import (arabic_numbers_to_english, + connect_single_letter_word, replace, + replace_except, replace_expression, + replace_pairs) +from maha.constants import (ARABIC_FOUR, ARABIC_NUMBERS, ARABIC_ONE, + ARABIC_TWO, EMPTY, ENGLISH_CAPITAL_LETTERS, + ENGLISH_NUMBERS, ENGLISH_SMALL_LETTERS) from tests.utils import list_not_in_string, list_only_in_string diff --git a/tests/parsers/test_duration.py b/tests/parsers/test_duration.py index 7890c3e..5ba2fa9 100644 --- a/tests/parsers/test_duration.py +++ b/tests/parsers/test_duration.py @@ -7,7 +7,8 @@ from maha.parsers.functions import parse_dimension from maha.parsers.rules.duration import * -from maha.parsers.rules.duration.template import DurationUnit, DurationValue, ValueUnit +from maha.parsers.rules.duration.template import (DurationUnit, DurationValue, + ValueUnit) from maha.parsers.templates import Dimension S = DurationUnit.SECONDS diff --git a/tests/parsers/test_parse.py b/tests/parsers/test_parse.py index 3750dc8..41fac7a 100644 --- a/tests/parsers/test_parse.py +++ b/tests/parsers/test_parse.py @@ -1,14 +1,7 @@ import pytest -from maha.constants import ( - ALEF_SUPERSCRIPT, - ARABIC, - ARABIC_NUMBERS, - BEH, - EMPTY, - FATHA, - KASRA, -) +from maha.constants import (ALEF_SUPERSCRIPT, ARABIC, ARABIC_NUMBERS, BEH, + EMPTY, FATHA, KASRA) from maha.parsers.functions import parse from maha.parsers.templates import Dimension, DimensionType from maha.rexy import Expression, ExpressionGroup diff --git a/tests/processors/test_base_processor.py b/tests/processors/test_base_processor.py index 0a15898..123aaa3 100644 --- a/tests/processors/test_base_processor.py +++ b/tests/processors/test_base_processor.py @@ -2,13 +2,8 @@ import pytest -from maha.constants import ( - ALEF_VARIATIONS, - ARABIC_LETTERS, - ARABIC_LIGATURES, - ENGLISH_LETTERS, - TEH_MARBUTA, -) +from maha.constants import (ALEF_VARIATIONS, ARABIC_LETTERS, ARABIC_LIGATURES, + ENGLISH_LETTERS, TEH_MARBUTA) from maha.expressions import EXPRESSION_HASHTAGS from maha.processors import BaseProcessor from tests.utils import list_not_in_string, list_only_in_string diff --git a/tests/test_deprecation.py b/tests/test_deprecation.py index 1d6c752..a35ad83 100644 --- a/tests/test_deprecation.py +++ b/tests/test_deprecation.py @@ -1,6 +1,7 @@ import pytest -from maha.deprecation import deprecated_default, deprecated_fn, deprecated_param +from maha.deprecation import (deprecated_default, deprecated_fn, + deprecated_param) def _get_warning_msg(recwarn): From b1921ef087df6726b3dfddc931ff05debf14d8fa Mon Sep 17 00:00:00 2001 From: MuhammadAlBarham Date: Fri, 4 Nov 2022 17:50:36 +0200 Subject: [PATCH 9/9] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0dd97e7..9ad5bc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: ['3.7', '3.8', '3.9', '3.10'] + python: ">=3.7,<3.11" steps: - name: Checkout the repository