-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
118 lines (85 loc) · 4.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import argparse # Import the argparse module
from components.text_aligner import TextAligner
from components.content_preprocessor import ContentPreprocessor
from components.exceptions import ChaosOverflow, RealignmentFailed
def main():
parser = argparse.ArgumentParser(description="Align Japanese and Chinese text files.")
parser.add_argument("jp_file", help="The file path of the Japanese text file")
parser.add_argument("cn_file", help="The file path of the Chinese text file")
args = parser.parse_args()
# Process the files based on the provided arguments
core(args.jp_file, args.cn_file)
# test(args.jp_file, args.cn_file)
def test(jp_file_path, cn_file_path):
print(f"Test processing files:\n {jp_file_path}\n {cn_file_path}")
# Read the content of both files
jp_content = read_file(jp_file_path)
cn_content = read_file(cn_file_path)
# Process the content of both files
jp_content = ContentPreprocessor(jp_content).preprocess_content()
cn_content = ContentPreprocessor(cn_content).preprocess_content()
# Creating an instance of the TextAlignment class to realign the texts
aligner = TextAligner(jp_content, cn_content)
print("Proceeding to overwrite the original files with the processed content.")
# Reconstruct the content from the lists
jp_content = '\n'.join([sublist[0] for sublist in aligner.jp_lines])
cn_content = '\n'.join([sublist[0] for sublist in aligner.cn_lines])
# Overwrite the original files with the processed content
write_file(jp_file_path, jp_content)
write_file(cn_file_path, cn_content)
print("Files have been processed and overwritten.\n\n-----\n")
def core(jp_file_path, cn_file_path):
print(f"\nBegin processing files:\n {jp_file_path}\n {cn_file_path}")
# Read the content of both files
jp_content = read_file(jp_file_path)
cn_content = read_file(cn_file_path)
# Process the content of both files
jp_content = ContentPreprocessor(jp_content).preprocess_content()
cn_content = ContentPreprocessor(cn_content).preprocess_content()
print("\nPreprocessing completed. Proceeding to align the files.")
# Creating an instance of the TextAlignment class to realign the texts
aligner = TextAligner(jp_content, cn_content)
try:
# Test
# raise ChaosOverflow(1000, 10)
aligner.realign_texts()
except ChaosOverflow as e:
print("Realignment process ended due to Chaos Overflow:", e)
except RealignmentFailed as e:
print("Realignment process ended due to Exception:", e)
except Exception as e:
print("Realignment process ended due to unforeseen Exception:", e)
raise e
print("Removing duplicated error correction lines.")
aligner.remove_duplicated_error_correction_lines()
print("Proceeding to overwrite the original files with the processed content.")
# Reconstruct the content from the lists
jp_content = '\n'.join([sublist[0] for sublist in aligner.jp_lines])
cn_content = '\n'.join([sublist[0] for sublist in aligner.cn_lines])
# Overwrite the original files with the processed content
write_file(jp_file_path, jp_content)
write_file(cn_file_path, cn_content)
print("Files have been processed and overwritten.\n\n-----\n")
if aligner.to_fix_line_id > 0:
print(f"Line to fix: {aligner.to_fix_line_id + 1}")
def read_file(file_path):
# Read and return the content of a file
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def write_file(file_path, content):
# For testing purposes, use a different file name to avoid overwriting the original file
# file_path = file_path.replace('.txt', '_processed.txt')
# Overwrite a file with the given content
with open(file_path, 'w', encoding='utf-8') as file:
file.write(content)
def print_list_readable(lst):
max_line_number = len(str(len(lst))) # Get the maximum number of digits in the line number
for index, sublist in enumerate(lst):
if len(sublist) > 1:
line_number = index + 1
print(f"Line {line_number:>{max_line_number}}: ", end="")
for item in sublist[1:]:
print(item, end=" ")
print()
if __name__ == "__main__":
main()