Skip to content

Commit

Permalink
release 0.2.0, now depending on text-utils>=0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
bastiscode committed Nov 2, 2023
1 parent cf62a05 commit 287f34e
Show file tree
Hide file tree
Showing 16 changed files with 144 additions and 148 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "third_party/text-correction-utils"]
path = third_party/text-correction-utils
url = https://github.com/bastiscode/text-correction-utils
[submodule "third_party/text-utils"]
path = third_party/text-utils
url = https://github.com/ad-freiburg/text-utils
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ echo "splitthissentenceforme" | wsc
cat "path/to/input/file.txt" | wsc > output.txt

# correct a string using
wsc -c "splitthissentenceforme"
wsc -p "splitthissentenceforme"

# correct a text file line by line and print the corrected lines
wsc -f path/to/input/file.txt
Expand Down
4 changes: 2 additions & 2 deletions configs/eo_lstm_char.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ model:
num_classes: 3

train:
mixed_precision: true
mixed_precision_dtype: env(MIXED_PRECISION_DTYPE:fp16)
precision: fp16
clip_grad_norm: env(CLIP_GRAD_NORM:1.0)
num_epochs: env(NUM_EPOCHS:3)
eval_interval: eval(1 / env(EVAL_PER_EPOCH:10))
Expand All @@ -47,6 +46,7 @@ train:
strategy: weighted
shuffle: true
sort: true
max_length: env(MAX_LENGTH:512)
buffer_size: env(BATCH_LIMIT:32)
prefetch_factor: env(PREFETCH_FACTOR:2048)
num_threads: eval(env(THREADS:None) or len(os.sched_getaffinity(0)) // 2)
Expand Down
4 changes: 2 additions & 2 deletions configs/eo_transformer_byt5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ model:
num_classes: 3

train:
mixed_precision: env(MIXED_PRECISION:true)
mixed_precision_dtype: env(MIXED_PRECISION_DTYPE:bfp16)
precision: env(PRECISION:fp16)
clip_grad_norm: env(CLIP_GRAD_NORM:1.0)
num_epochs: env(NUM_EPOCHS:1)
eval_interval: eval(1 / env(EVAL_PER_EPOCH:10))
Expand All @@ -50,6 +49,7 @@ train:
strategy: weighted
shuffle: true
sort: true
max_length: env(MAX_LENGTH:512)
buffer_size: env(BATCH_LIMIT:32)
prefetch_factor: env(PREFETCH_FACTOR:2048)
num_threads: eval(env(THREADS:None) or len(os.sched_getaffinity(0)) // 2)
Expand Down
4 changes: 2 additions & 2 deletions configs/eo_transformer_byt5_scratch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ model:
num_classes: 3

train:
mixed_precision: env(MIXED_PRECISION:true)
mixed_precision_dtype: env(MIXED_PRECISION_DTYPE:bfp16)
precision: env(PRECISION:fp16)
clip_grad_norm: env(CLIP_GRAD_NORM:1.0)
num_epochs: env(NUM_EPOCHS:1)
eval_interval: eval(1 / env(EVAL_PER_EPOCH:10))
Expand All @@ -47,6 +46,7 @@ train:
strategy: weighted
shuffle: true
sort: true
max_length: env(MAX_LENGTH:512)
buffer_size: env(BATCH_LIMIT:32)
prefetch_factor: env(PREFETCH_FACTOR:2048)
num_threads: eval(env(THREADS:None) or len(os.sched_getaffinity(0)) // 2)
Expand Down
4 changes: 2 additions & 2 deletions configs/eo_transformer_byte_v1_like.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ model:
num_classes: 3

train:
mixed_precision: env(MIXED_PRECISION:true)
mixed_precision_dtype: env(MIXED_PRECISION_DTYPE:fp16)
precision: env(PRECISION:fp16)
clip_grad_norm: env(CLIP_GRAD_NORM:1.0)
num_epochs: env(NUM_EPOCHS:3)
eval_interval: eval(1 / env(EVAL_PER_EPOCH:10))
Expand All @@ -56,6 +55,7 @@ train:
strategy: weighted
shuffle: true
sort: true
max_length: env(MAX_LENGTH:512)
buffer_size: env(BATCH_LIMIT:32)
prefetch_factor: env(PREFETCH_FACTOR:2048)
num_threads: eval(env(THREADS:None) or len(os.sched_getaffinity(0)) // 2)
Expand Down
4 changes: 2 additions & 2 deletions configs/eo_transformer_char_v1_like.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ model:
num_classes: 3

train:
mixed_precision: true
mixed_precision_dtype: env(MIXED_PRECISION_DTYPE:fp16)
precision: env(PRECISION:fp16)
clip_grad_norm: env(CLIP_GRAD_NORM:1.0)
num_epochs: env(NUM_EPOCHS:3)
eval_interval: eval(1 / env(EVAL_PER_EPOCH:10))
Expand All @@ -50,6 +49,7 @@ train:
strategy: weighted
shuffle: true
sort: true
max_length: env(MAX_LENGTH:512)
buffer_size: env(BATCH_LIMIT:32)
prefetch_factor: env(PREFETCH_FACTOR:2048)
num_threads: eval(env(THREADS:None) or len(os.sched_getaffinity(0)) // 2)
Expand Down
5 changes: 2 additions & 3 deletions configs/server.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
port: 40000
timeout: 10
# precision: fp16
# allow_origin: test.mydomain.com
base_url: env(BASE_URL:/api)
models:
# load a pretrained model by specifying the name
# - eo_large_arxiv
# - name: eo_large_arxiv
# load a model from a local experiment by specifying the
# directory path (you can use special configuration operators,
# e.g. env(ENV_VAR) to load env variables)
- env(EXPERIMENT)
- path: env(EXPERIMENT)
batch_size: env(BATCH_SIZE:16)
# batch_max_tokens: env(BATCH_MAX_TOKENS:8192)
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "whitespace_correction"
version = "0.1.5"
version = "0.2.0"
description = "Correct missing or spurious whitespaces in text."
authors = [
{ name = "Sebastian Walter", email = "[email protected]" }
Expand All @@ -20,7 +20,7 @@ classifiers = [
]

dependencies = [
"text-correction-utils==0.1.4",
"dtpu>=0.2.1",
"transformers>=4.26.0"
]

Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from whitespace_correction.api import WhitespaceCorrector

from text_correction_utils import hook, logging
from text_utils import hook, logging

from torch import nn
import numpy as np
Expand Down
22 changes: 11 additions & 11 deletions src/whitespace_correction/api/cli.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
from io import TextIOWrapper
from typing import Iterator, Optional, Union

from text_correction_utils.api.cli import TextCorrectionCli
from text_correction_utils import data
from text_utils.api.cli import TextProcessingCli
from text_utils import data

from whitespace_correction import version
from whitespace_correction.api.corrector import WhitespaceCorrector
from whitespace_correction.api.server import WhitespaceCorrectionServer


class WhitespaceCorrectionCli(TextCorrectionCli):
text_corrector_cls = WhitespaceCorrector
text_correction_server_cls = WhitespaceCorrectionServer
class WhitespaceCorrectionCli(TextProcessingCli):
text_processor_cls = WhitespaceCorrector
text_processing_server_cls = WhitespaceCorrectionServer

def version(self) -> str:
return version.__version__

def correct_iter(
def process_iter(
self,
corrector: WhitespaceCorrector,
processor: WhitespaceCorrector,
iter: Iterator[data.InferenceData]
) -> Iterator[data.InferenceData]:
yield from corrector.correct_iter(
yield from processor.correct_iter(
((data.text, data.language) for data in iter),
self.args.batch_size,
self.args.batch_max_tokens,
Expand All @@ -31,14 +31,14 @@ def correct_iter(
show_progress=self.args.progress
)

def correct_file(
def process_file(
self,
corrector: WhitespaceCorrector,
processor: WhitespaceCorrector,
path: str,
lang: Optional[str],
out_file: Union[str, TextIOWrapper]
):
corrector.correct_file(
processor.correct_file(
path,
self.args.input_format,
out_file,
Expand Down
Loading

0 comments on commit 287f34e

Please sign in to comment.