From e1bbadca3414ae5263bd17008e2ad6baa0e1ebfd Mon Sep 17 00:00:00 2001 From: Luca Soldaini Date: Mon, 30 Dec 2024 15:06:10 -0800 Subject: [PATCH] fixed style issues --- python/dolma/taggers/language.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/dolma/taggers/language.py b/python/dolma/taggers/language.py index a8ba0cf3..b157bc80 100644 --- a/python/dolma/taggers/language.py +++ b/python/dolma/taggers/language.py @@ -4,14 +4,14 @@ @kylel, @soldni """ -from typing import TYPE_CHECKING, List, Tuple, Iterable +from typing import TYPE_CHECKING, Iterable, List, Tuple import necessary import regex from anyascii import anyascii from ..core.data_types import DocResult, Document, Span -from ..core.ft_tagger import BaseFastTextTagger, TextSlice, Prediction +from ..core.ft_tagger import BaseFastTextTagger, Prediction, TextSlice from ..core.registry import TaggerRegistry from ..core.taggers import BaseTagger from ..core.utils import split_paragraphs @@ -32,14 +32,17 @@ with necessary.necessary("lingua", soft=True) as LINGUA_AVAILABLE: if LINGUA_AVAILABLE or TYPE_CHECKING: - from lingua import Language, LanguageDetectorBuilder # pylint: disable=import-error # pyright: ignore + from lingua import ( # pylint: disable=import-error # pyright: ignore + Language, + LanguageDetectorBuilder, + ) class BaseLanguageTagger(BaseTagger): INCLUDE_NEGATIVE = True PREDICT_ON_PARAGRAPHS = False - def predict_text(self, text: str) -> List[Tuple[str, float]]: # pylint: disable=unused-argument + def predict_text(self, text: str) -> List[Tuple[str, float]]: # pylint: disable=unused-argument return [] def make_negative(self, spans: List[Span]) -> List[Span]: @@ -146,7 +149,7 @@ class Cld2EnglishLanguageParagraphTagger(Cld2EnglishLanguageTagger): @TaggerRegistry.add("ft_lang_id_doc_v1") class FastTextAllLanguagesDocumentTagger(BaseLanguageTagger, BaseFastTextTagger): - MODEL_PATH = "https://dolma-artifacts/lang_id_models/fbai/lid.176.bin" + MODEL_PATH = "https://dolma-artifacts.org/lang_id_models/fbai/lid.176.bin" INCLUDE_NEGATIVE = False PREDICT_ON_PARAGRAPHS = False