Skip to content

Commit

Permalink
fixed style issues
Browse files Browse the repository at this point in the history
  • Loading branch information
soldni authored Dec 30, 2024
1 parent a855690 commit e1bbadc
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions python/dolma/taggers/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
@kylel, @soldni
"""

from typing import TYPE_CHECKING, List, Tuple, Iterable
from typing import TYPE_CHECKING, Iterable, List, Tuple

import necessary
import regex
from anyascii import anyascii

from ..core.data_types import DocResult, Document, Span
from ..core.ft_tagger import BaseFastTextTagger, TextSlice, Prediction
from ..core.ft_tagger import BaseFastTextTagger, Prediction, TextSlice
from ..core.registry import TaggerRegistry
from ..core.taggers import BaseTagger
from ..core.utils import split_paragraphs
Expand All @@ -32,14 +32,17 @@

with necessary.necessary("lingua", soft=True) as LINGUA_AVAILABLE:
if LINGUA_AVAILABLE or TYPE_CHECKING:
from lingua import Language, LanguageDetectorBuilder # pylint: disable=import-error # pyright: ignore
from lingua import ( # pylint: disable=import-error # pyright: ignore
Language,
LanguageDetectorBuilder,
)


class BaseLanguageTagger(BaseTagger):
INCLUDE_NEGATIVE = True
PREDICT_ON_PARAGRAPHS = False

def predict_text(self, text: str) -> List[Tuple[str, float]]: # pylint: disable=unused-argument
def predict_text(self, text: str) -> List[Tuple[str, float]]: # pylint: disable=unused-argument
return []

def make_negative(self, spans: List[Span]) -> List[Span]:
Expand Down Expand Up @@ -146,7 +149,7 @@ class Cld2EnglishLanguageParagraphTagger(Cld2EnglishLanguageTagger):

@TaggerRegistry.add("ft_lang_id_doc_v1")
class FastTextAllLanguagesDocumentTagger(BaseLanguageTagger, BaseFastTextTagger):
MODEL_PATH = "https://dolma-artifacts/lang_id_models/fbai/lid.176.bin"
MODEL_PATH = "https://dolma-artifacts.org/lang_id_models/fbai/lid.176.bin"
INCLUDE_NEGATIVE = False
PREDICT_ON_PARAGRAPHS = False

Expand Down

0 comments on commit e1bbadc

Please sign in to comment.