Skip to content

Commit

Permalink
Merge pull request #5 from niskala5570/sengkang
Browse files Browse the repository at this point in the history
Masalah perkataan bersengkang
  • Loading branch information
niskala5570 authored May 24, 2023
2 parents cee4be3 + 61fdd87 commit 6bd8769
Showing 1 changed file with 19 additions and 16 deletions.
35 changes: 19 additions & 16 deletions alih-kata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# The code mostly generated by ChatGPT (I can't code and just stitch it together part by part)
# Even I don't understand much but I will explain what the code will do inside README.md
# Kod ni kebanyakkannya ditulis dengan ChatGPT sebab aku malas dan tak reti.
# Jadi aku camtum2 macam lego, kalau nampak cacat dan rajin nak tolong kemaskan, dipersilakan.
# Aku buat kod ni sebab nak percepatkan kerja sahaja, takdelah aku perlu tulis balik Jawi dari awal.
# Untuk perkataan bersengkang, kena buat masukan kamus baru sebab kod tak akan pisahkan(tokenize) Contohnya "Kazuki-kun"
# Ini supaya perkataan berganda seperti "apa-apa, اڤ٢" dapat dialih dengan sempurna

import pysubs2
import os
Expand All @@ -9,7 +12,7 @@
folder_keluar = "Keluar"
folder_kamus = "Kamus"

def muat_katan(file_path):
def muatKamus(file_path):
data = {}
with open(file_path) as file:
lines = file.readlines()
Expand All @@ -22,7 +25,7 @@ def muat_katan(file_path):
data[rumi] = [jawi]
return data

def alih_kata(padanan, translations, chosen_translations, context):
def alihKata(padanan, translations, chosen_translations, context):
katan = padanan.group(0)
if katan.lower() in translations:
translations_list = translations[katan.lower()]
Expand All @@ -43,7 +46,7 @@ def alih_kata(padanan, translations, chosen_translations, context):
return translations_list[0]
return katan

def alih_ayat(ayat, translations, padanan_tanda, chosen_translations):
def alihAyat(ayat, translations, padanan_tanda, chosen_translations):
def gantikan_tanda(padanan):
tanda = padanan.group(0)
if tanda in padanan_tanda:
Expand All @@ -52,21 +55,21 @@ def gantikan_tanda(padanan):

ayat_diterjemah = re.sub(r'(?<!\\)(\\[Nnh])', r' {\1} ', ayat)

ayat_diterjemah = re.sub(r'(?<!{){[^}]+}|[A-Za-z]+', lambda padanan: alih_kata(padanan, translations, chosen_translations, ayat), ayat_diterjemah)
ayat_diterjemah = re.sub(r'(?<!{){[^}]+}|[A-Za-z\-]+', lambda padanan: alihKata(padanan, translations, chosen_translations, ayat), ayat_diterjemah)
ayat_diterjemah = re.sub(r'[?,;,.]', gantikan_tanda, ayat_diterjemah)

ayat_diterjemah = ayat_diterjemah.replace('{\\N}', '\\N').replace('{\\n}', '\\n').replace('{\\h}', '\\h')

return ayat_diterjemah

def baiki_ejaan(teks):
def baikiEjaan(teks):
# Cari dan ganti 'د ' dan 'ک ' yang berjarak untuk membetulkan perkataan cthnya 'د سکوله'
teks = re.sub(r'\bد\s', 'د', teks)
teks = re.sub(r'\bک\s', 'ک', teks)

return teks

def alih_kata_sarikata(file_path, translations, padanan_tanda, padanan_tanggaman_akhiran, padanan_tanggaman_awalan):
def alihKata_Sarikata(file_path, translations, padanan_tanda, padanan_tanggaman_akhiran, padanan_tanggaman_awalan):
sarikata = pysubs2.load(file_path)
tidak_teralih = []
chosen_translations = {} # Dictionary to store chosen translations
Expand All @@ -84,27 +87,27 @@ def alih_kata_sarikata(file_path, translations, padanan_tanda, padanan_tanggaman
extracted_content[placeholder] = '{' + extract + '}'
teks_dialog = teks_dialog.replace('{' + extract + '}', placeholder)

dialog_terjemah = baiki_ejaan(alih_ayat(teks_dialog, translations, padanan_tanda, chosen_translations))
dialog_terjemah = baikiEjaan(alihAyat(teks_dialog, translations, padanan_tanda, chosen_translations))
dialog_terjemah = "{\\fe-1}" + dialog_terjemah

for placeholder, content in extracted_content.items():
dialog_terjemah = dialog_terjemah.replace(placeholder, content)

dialog.text = dialog_terjemah

katan_rumi_jawi = re.findall(r'(?<!{)(?<!\\)(?:\\\\)*(?<!\\[Nnh])\b[A-Za-z]+(?<!\\)(?!})(?<!\\[Nnh])', teks_dialog)
katan_rumi_jawi = re.findall(r'(?<!{)(?<!\\)(?:\\\\)*(?<!\\[Nnh])\b[A-Za-z\-]+(?<!\\)(?!})(?<!\\[Nnh])', teks_dialog)
for katan in katan_rumi_jawi:
if katan.lower() not in translations and not re.search(r'(?<!\\){[^}]+}', teks_dialog):
katan_terjemah = alih_kata(re.search(katan, teks_dialog), translations, chosen_translations, teks_dialog)
katan_terjemah = alihKata(re.search(katan, teks_dialog), translations, chosen_translations, teks_dialog)
if katan_terjemah == katan:
for tanggaman_akhiran in padanan_tanggaman_akhiran:
if katan.lower().endswith(tanggaman_akhiran):
katan_terjemah = alih_kata(re.search(katan[:-len(tanggaman_akhiran)], teks_dialog), translations, chosen_translations, teks_dialog) + padanan_tanggaman_akhiran[tanggaman_akhiran]
katan_terjemah = alihKata(re.search(katan[:-len(tanggaman_akhiran)], teks_dialog), translations, chosen_translations, teks_dialog) + padanan_tanggaman_akhiran[tanggaman_akhiran]
break
if katan_terjemah == katan:
for tanggaman_awalan in padanan_tanggaman_awalan:
if katan.lower().startswith(tanggaman_awalan):
katan_terjemah = padanan_tanggaman_awalan[tanggaman_awalan] + alih_kata(re.search(katan[len(tanggaman_awalan):], teks_dialog), translations, chosen_translations, teks_dialog)
katan_terjemah = padanan_tanggaman_awalan[tanggaman_awalan] + alihKata(re.search(katan[len(tanggaman_awalan):], teks_dialog), translations, chosen_translations, teks_dialog)
break
if katan_terjemah != katan:
dialog_terjemah = dialog_terjemah.replace(katan, katan_terjemah)
Expand All @@ -123,10 +126,10 @@ def alih_kata_sarikata(file_path, translations, padanan_tanda, padanan_tanggaman
for file in os.listdir(folder_kamus):
if file.endswith(".tsv"):
file_path = os.path.join(folder_kamus, file)
katan_kamus = {**katan_kamus, **muat_katan(file_path)}
katan_kamus = {**katan_kamus, **muatKamus(file_path)}

padanan_tanda = {"?": "؟", ";": "⁏", ",": "⹁", ".": "."}
padanan_tanggaman_akhiran = {"lah": "له", "kah": "که", "nya": "ڽ", "kan": "کن", "i": "ي", "ku": "کو", "mu": "مو"}
padanan_tanggaman_akhiran = {"lah": "له", "kah": "که", "kan": "کن", "i": "ي", "ku": "کو", "mu": "مو", "nya": "}
padanan_tanggaman_awalan = {"ber": "بر", "mem": "مم", "meng": "مڠ", "se": "س", "tak": "تق", "per": "ڤر"}

select_translation = True
Expand All @@ -138,7 +141,7 @@ def alih_kata_sarikata(file_path, translations, padanan_tanda, padanan_tanggaman
print(f"Mengalih kata: {fail}\n---")
laluan_fail = os.path.join(folder_masuk, fail)
if select_translation:
tidak_teralih_count, tidak_teralih = alih_kata_sarikata(laluan_fail, katan_kamus, padanan_tanda, padanan_tanggaman_akhiran, padanan_tanggaman_awalan)
tidak_teralih_count, tidak_teralih = alihKata_Sarikata(laluan_fail, katan_kamus, padanan_tanda, padanan_tanggaman_akhiran, padanan_tanggaman_awalan)
else:
tidak_teralih_count, tidak_teralih = 0, []
if tidak_teralih_count > 0:
Expand Down

0 comments on commit 6bd8769

Please sign in to comment.