-
Notifications
You must be signed in to change notification settings - Fork 5
/
DicControls.pm
177 lines (149 loc) · 8.66 KB
/
DicControls.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/perl
package DicControls;
use warnings;
use strict;
use DicGlobals;
use Dic2Screen;
use DicPrepare;
use DicConversion;
use DicHelpUtils;
## Controls from module Dic2Screen
# Control the verbosity debug messages
( $isDebug, $isDebugVerbose, $isDebugVeryVerbose ) = ( 1, 0, 0 );
# Control the verbosity info messages
( $isInfo, $isInfoVerbose, $isInfoVeryVerbose ) = ( 1, 0, 0 );
# Control for sub die2()
$isRealDead = 1; # Some errors should kill the program. However, somtimes you just want to convert.
## Controls from module DicPrepare
# Controls for cleanseAr
$DebugKeyWordCleanseAr = '<k>φλέως</k>'; # In cleanseAr only extensive debug messages for this entry are shown. E.g. '<k>φλέως</k>'
$isCutDoneWithTidyXML = 0; # Enables or disables the cutting of a line for the pocketbook dictionary in cleanseAr. Still experimental, so disable.
$isRemoveWaveReferences = 1; # Removes all the references to wav-files Could be encoded in Base64 now.
# This controls the maximum article length. If set too large, the old converter will crash and the new will truncate the entry.
$max_article_length = 64000;
# This controls the maximum line length. If set too large, the converter wil complain about bad XML syntax and exit.
$max_line_length = 4000; # In bytes as generated by: length( encode('UTF-8', $line) )
$remove_color_tags = 0; # Not all viewers can handle color/grayscale. Removing them reduces the article size considerably. Relevant for pocketbook dictionary.
# Controls for loadXDXF
$isLoadFromPseudoFileName = 0; # Controls whether to skip converting to XDXF if an XDXF-file is already found prepared.
# Control variable makeKoreaderReady
# Sometimes koreader want something extra. E.g. create css- and/or lua-file, convert <c color="red"> tags to <span style="color:red;">
$isMakeKoreaderReady = 1 ;
$isMakeKoreaderReady_SpanColor2Style = 0 ;
$isMakeKoreaderReady_SpanWidth2Style = 0 ;
$isMakeKoreaderReady_SpanStyleWidht2Padding = 0 ;
$isMakeKoreaderReady_MergeStyles = 0 ;
$isChangeTable2Div4Koreader = 1 ; # Adds lines to lua-file
# Controls for reconstructXDXF
# Controls manual input: 0 disables.
( $lang_from, $lang_to, $format ) = ( "eng", "eng" ,"" ); # Default settings for manual input of xdxf tag.
$reformat_full_name = 1 ; # Value 1 demands user input for full_name tag.
$reformat_xdxf = 1 ; # Value 1 demands user input for xdxf tag.
## Controls from module DicConversion
# Control variables for convertABBYY2XDXF, the conversion of ABBYY-generated HTML.
$isABBYYWordlistNeeded = 1; # Controls creation of an ABBYYWordlist.txt file.
$isABBYYAllCleared = 0; # Controls creation of a hash-file.
$isABBYYConverterReuse = 0; # Controls the check for already generated xdxf-file
$isABBYConverted = 0; # Global variable that gets set to 1 if convertABBYY2XDXF returns an xdxf-array.
# Conversion pauses during keywords
@ABBYYConverterPauseFor = (
# E.g.,
# 'égard',
# 'ète',
);
# Manual overrule. Conversion checks whether keyword is allowed and passes it without further tests.
@ABBYYConverterAllowedKeys = (
q~corbeille-d’argent~,
q~crespelé, e~,
q~cul-rond~,
q~desquels, desquelles~,
q~duquel~,
q~fœhn~,
q~giboyeux, euse~,
q~glacial, e, als~,
q~hydro-. V~,
q~inaliénablement~,
q~in aliéné, e~,
q~laquelle~,
q~melliflu, e~,
q~peu chère~,
q~pick-nick n.m.~,
);
# Controls for convertCVStoXDXF
# Deliminator for CSV files, usually ",",";" or "\t"(tab).
$CVSDeliminator = ",";
# Controls for convertHTML2XDXF
$DebugKeyWordConvertHTML2XDXF = "Gewirr"; # In convertHTML2XDXF only debug messages from this entry are shown. E.g. "Gewirr"
$isConvertDiv2SpaninHTML2DXDF= 0 ;
$isConvertFont2Small = 0 ;
$isConvertFont2Span = 0 ;
$isConvertMMCFullText2Span = 1 ;
# Controls for convertImage2Base64
$isConvertGIF2PNG = 0 ; # Creates a dependency on Imagemagick "convert".
# Controls for convertIMG2Text, the conversion of scanned text images to text.
$isManualValidation = 1; # Manually validate OCRed images.
$isRemoveUnSubstitutedImageString = 1; # If not substitution for the imagestring was made, it can be removed or not.
$isRemoveUnSourcedImageStrings = 1; # Remove Imagestrings without a src file.
# Controls for generateXDXFTagBased
$HigherFrequencyTags = 10 ; # Tags below this frequency, e.g. 10 times, are considered lower frequency.
$isDeleteLowerFrequencyTagsinFilterTagsHash = 0 ; # And the consequeces of that can be toggled, too.
$isExcludeImgTags = 1 ; # <img.../>-tags are removed if toggle is positive.
$isgatherSetsVerbose = 0 ; # Controls verbosity of tag functions
$isRemoveMpbAndBodyTags = 0 ; # <mbp...> and <body>-tags are removed if toggle is positive.
@KnownStylingTags = ( 'a', 'i', 'b', 'font'); # tags to be skipped if $isSkipKnownStylingTags = 1.
$isSkipKnownStylingTags = 1 ; # <b>-, <i>-tags and such are usually not relevant for structuring lemma/definition pairs. However, <font...>-tags sometimes are. So check.
$MinimumSetPercentage = 80 ; # A tag-set should be at least this percentage to be considered the outer tags for an article.
$LowFrequencyCriterium = 100; # Used in sub gatherSets
## Controls from module DicHelpUtils
# Controls escapeHTMLString and unEscapeHTMLString
$isEscapeHTMLCharacters = 1;
$unEscapeHTML = 1;
# Controls for convertMobiAltCodes
$isConvertMobiAltCodes = 0; # Apparently, characters in the range of 1-31 are displayed as alt-codes in mobireader.
## Shortcuts to Collection of settings.
# If you select both settings, they will be ignored.
our $Just4Koreader = 0 ;
our $Just4PocketBook = 1 ;
if( $Just4Koreader and !$Just4PocketBook){
# Controls for Stardict dictionary creation and Koreader stardict compatabiltiy
$isCreateStardictDictionary = 1; # Turns on Stardict text and binary dictionary creation.
$SameTypeSequence = "h"; # Either "h" or "m" or "x".
$updateSameTypeSequence = 1; # If the Stardict files give a sametypesequence value, update the initial value.
$isConvertColorNamestoHexCodePoints = 1; # Converting takes time.
$isMakeKoreaderReady = 1; # Sometimes koreader want something extra. E.g. create css- and/or lua-file, convert <c color="red"> tags to <span style="color:red;">
# Controls for Pocketbook conversion
$isCreatePocketbookDictionary = 0; # Controls conversion to Pocketbook Dictionary dic-format
$remove_color_tags = 0; # Not all viewers can handle color/grayscale. Removing them reduces the article size considerably. Relevant for pocketbook dictionary.
$max_article_length = 640000;
$max_line_length = 8000;
# Controls for recoding or deleting images and sounds.
$isRemoveWaveReferences = 1; # Removes all the references to wav-files Could be encoded in Base64 now.
$isCodeImageBase64 = 0; # Some dictionaries contain images. Encoding them as Base64 allows coding them inline. Only implemented with convertHTML2XDXF.
$isConvertGIF2PNG = 0; # Creates a dependency on Imagemagick "convert".
$unEscapeHTML = 0;
$ForceConvertNumberedSequencesToChar = 1;
$ForceConvertBlockquote2Div = 0;
$isEscapeHTMLCharacters = 0;
}
if( $Just4PocketBook and !$Just4Koreader){
# Controls for Stardict dictionary creation and Koreader stardict compatabiltiy
$isCreateStardictDictionary = 0; # Turns on Stardict text and binary dictionary creation.
$SameTypeSequence = "h"; # Either "h" or "m" or "x".
$updateSameTypeSequence = 1; # If the Stardict files give a sametypesequence value, update the initial value.
$isConvertColorNamestoHexCodePoints = 0; # Converting takes time and space
$isMakeKoreaderReady = 0; # Sometimes koreader want something extra. E.g. create css- and/or lua-file, convert <c color="red"> tags to <span style="color:red;">
# Controls for Pocketbook conversion
$isCreatePocketbookDictionary = 1; # Controls conversion to Pocketbook Dictionary dic-format
$remove_color_tags = 1; # Not all viewers can handle color/grayscale. Removing them reduces the article size considerably. Relevant for pocketbook dictionary.
$max_article_length = 64000;
$max_line_length = 4000;
# Controls for recoding or deleting images and sounds.
$isRemoveWaveReferences = 1; # Removes all the references to wav-files Could be encoded in Base64 now.
$isCodeImageBase64 = 1; # Some dictionaries contain images. Encoding them as Base64 allows coding them inline. Only implemented with convertHTML2XDXF.
$isConvertGIF2PNG = 0; # Creates a dependency on Imagemagick "convert".
$unEscapeHTML = 1;
$ForceConvertNumberedSequencesToChar = 1;
$ForceConvertBlockquote2Div = 1;
$isEscapeHTMLCharacters = 0;
}
1;