Skip to content

Commit

Permalink
Merge pull request zyddnys#747 from popcion/main
Browse files Browse the repository at this point in the history
Fix the bug where unfiltered content after translation is incorrectly displayed as filtered in log, and improve the filtering logic
  • Loading branch information
zyddnys authored Nov 23, 2024
2 parents cc89fe6 + e5b3794 commit 661fd5e
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ Colorizer: **mc2**
--save-text-file SAVE_TEXT_FILE Like --save-text but with a specified file path.
--filter-text FILTER_TEXT Filter regions by their text with a regex. Example
usage: --text-filter ".*badtext.*"
--pre-dict FILe_PATH Path to the pre-translation dictionary file. One entry per line,
--pre-dict FILE_PATH Path to the pre-translation dictionary file. One entry per line,
Comments can be added with `#` and `//`.
usage: //Example
dog cat #Example
Expand Down
6 changes: 2 additions & 4 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,13 @@ FIL: Filipino (Tagalog)
--save-text-file SAVE_TEXT_FILE Like --save-text but with a specified file path.
--filter-text FILTER_TEXT Filter regions by their text with a regex. Example
usage: --text-filter ".*badtext.*"
--filter-text FILTER_TEXT Filter regions by their text with a regex. Example
usage: --text-filter ".*badtext.*"
--pre-dict FILe_PATH Path to the pre-translation dictionary file. One entry per line,
--pre-dict FILE_PATH Path to the pre-translation dictionary file. One entry per line,
Comments can be added with `#` and `//`.
usage: //Example
dog cat #Example
abc def
abc
--post-dict file_path Path to the post-translation dictionary file. Same as above.
--post-dict FILE_PATH Path to the post-translation dictionary file. Same as above.
--skip-lang Skip translation if source image is one of the provide languages,
use comma to separate multiple languages. Example: JPN,ENG
--prep-manual Prepare for manual typesetting by outputting blank,
Expand Down
44 changes: 35 additions & 9 deletions manga_translator/manga_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,28 +620,38 @@ async def _run_text_translation(self, ctx: Context):
diff_target_regions = [] # Target language regions with different translation
same_non_target_regions = [] # Non-target language regions with identical translation
diff_non_target_regions = [] # Non-target language regions with different translation
has_target_lang_in_translation_regions = []

for region in ctx.text_regions:
text_equal = region.text.lower().strip() == region.translation.lower().strip()
has_target_lang = False
has_target_lang_in_translation = False

# Target language detection
if ctx.target_lang in ['CHS', 'CHT']: # Chinese
has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))
has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u4e00-\u9fff]', region.translation))
elif ctx.target_lang == 'JPN': # Japanese
has_target_lang = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.translation))
elif ctx.target_lang == 'KOR': # Korean
has_target_lang = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.translation))
elif ctx.target_lang == 'ARA': # Arabic
has_target_lang = bool(re.search('[\u0600-\u06ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0600-\u06ff]', region.translation))
elif ctx.target_lang == 'THA': # Thai
has_target_lang = bool(re.search('[\u0e00-\u0e7f]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0e00-\u0e7f]', region.translation))
elif ctx.target_lang == 'RUS': # Russian
has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation))
elif ctx.target_lang == 'UKR': # Ukrainian
has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation))
elif ctx.target_lang == 'IND': # Indonesian
has_target_lang = bool(re.search('[A-Za-z]', region.text))
has_target_lang_in_translation = bool(re.search('[A-Za-z]', region.translation))

# Skip numeric translations and filtered text
if region.translation.isnumeric():
Expand All @@ -655,27 +665,43 @@ async def _run_text_translation(self, ctx: Context):
continue

if has_target_lang:
if text_equal:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original')
if text_equal:
same_target_regions.append(region)
else:
diff_target_regions.append(region)
else:
if text_equal:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original')
same_non_target_regions.append(region)
else:
diff_non_target_regions.append(region)


if has_target_lang_in_translation:
has_target_lang_in_translation_regions.append(region)

# If any different translations exist, retain all target language regions
if diff_target_regions or diff_non_target_regions:
new_text_regions.extend(same_target_regions)
new_text_regions.extend(diff_target_regions)

# Keep all non_target_lang regions with different translations (if translation contains target language characters)
for region in diff_non_target_regions:
if region in has_target_lang_in_translation_regions:
new_text_regions.append(region)
else:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation does not contain target language characters')

# Retain all non-target language regions with different translations (It appears empty, it clears all contents.)
new_text_regions.extend(diff_non_target_regions)
# No different translations exist, clear all content.
if not (diff_target_regions or diff_non_target_regions):
for region in same_target_regions:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original -the whole page-')

# Clear non_target_lang_regions with identical translations.
for region in same_non_target_regions:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original -one textine-')


else:
# Process non-special language scenarios using original logic
Expand Down

0 comments on commit 661fd5e

Please sign in to comment.