-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFileMelt.py
298 lines (237 loc) · 11.7 KB
/
FileMelt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# SETTINGS
inputFolder = "source"
outputFolder = "docs"
deleteFilesMissingInput = True # Delete files in output folder without corresponding input files
printFileStatistics = True # Print the size decrease of each individual file.
removeHtmlComments = True # Remove HTML comments
removeSvgComments = True # Remove svg comments
minifyJsFiles = True # Minify JS files (imperfect in some cases)
removeConsoleLog = True # Remove JavaScript console log statements.
# Built-in imports
import os
import shutil
import re
import xml.etree.ElementTree
import importlib
import pip
# Install external dependencies
def installPackage(packageName):
try:
importlib.import_module(packageName)
except ImportError:
importlib.invalidate_caches()
pip.main(['install', packageName])
installPackage('htmlmin')
from htmlmin import minify
installPackage('jsmin')
from jsmin import jsmin
installPackage('csscompressor')
from csscompressor import compress
### Methods
# Shrink the style tag
def minifyStyleTag(htmlString):
# Helper function to minify the content of inside <style> tag
def minifyStyleContent(match):
cssCode = match.group(1)
minifiedCss = compress(cssCode)
return f'<style>{minifiedCss}</style>'
# Define a regular expression pattern to match <style> tags and their content
styleTagPattern = r'<style[^>]*>(.*?)</style>'
# Use re.sub() to find and replace <style> tags with minified content
return re.sub(styleTagPattern, minifyStyleContent, htmlString, flags=re.DOTALL)
# Process script tags
def processScriptTags(match):
script_content = match.group(1)
if re.search(r'type\s*=\s*["\']?module["\']?', match.group(0), re.IGNORECASE):
# If the <script> tag has type="module", keep it
return "<script type=\"module\">" + jsmin(script_content) + "</script>"
else:
# If the <script> tag doesn't have type="module", remove the type attribute
return '<script>' + jsmin(script_content) + '</script>'
# Delete console log statements
def removeConsoleLogStatements(html_string):
# Regular expression pattern to match script tags
script_pattern = r'<script\b([^>]*)>([\s\S]*?)<\/script>'
def repl(match):
# Extract attributes and script content
attributes = match.group(1)
script_content = match.group(2)
# Remove console.log statements from script content
script_content = re.sub(r'console\.log\s*\([^)]*\);?', '', script_content)
# Reconstruct the script tag with attributes (if any)
if attributes:
if "type=module" in attributes:
attributes.replace("type=module","type=\"module\"")
attributes = attributes.lstrip()
return f'<script {attributes}>{script_content}</script>'
else:
return f'<script>{script_content}</script>'
# Use re.sub to replace and modify script tags
return re.sub(script_pattern, repl, html_string)
# Minify html files
def minifyHtml(inputFile, outputFile):
with open(inputFile, "r") as inFile, open(outputFile, "w") as outFile:
htmlContent = inFile.read()
# Substitute type="module" with placeholders
type_module_pattern = re.compile(r'type\s*=\s*(?:"module"|\'module\')')
type_module_placeholders = []
htmlContent, _ = re.subn(type_module_pattern, lambda x: type_module_placeholders.append(x.group()) or r'__FILEMELT_TYPE_MODULE_PLACEHOLDER__', htmlContent)
# Replace all strings with placeholders
placeholders = []
stringPattern = r'"(?:\\.|[^"\\])*"'
multilineStringPattern = r'`[^`]*`'
htmlContent = re.sub(stringPattern, lambda x: placeholders.append(x.group()) or f"__FILEMELT_STRING_PLACEHOLDER_{len(placeholders) - 1}__", htmlContent)
htmlContent = re.sub(multilineStringPattern, lambda x: placeholders.append(x.group()) or f"__FILEMELT_MULTILINE_STRING_PLACEHOLDER_{len(placeholders) - 1}__", htmlContent)
# Resubstitute type="module"
htmlContent = re.sub(r'__FILEMELT_TYPE_MODULE_PLACEHOLDER__', lambda x: type_module_placeholders.pop(0), htmlContent)
# Process scripts
htmlContent = re.sub(r'<script[^>]*>([\s\S]*?)<\/script>', processScriptTags, htmlContent)
# Minify style tag
htmlContent = minifyStyleTag(htmlContent)
# Remove HTML comments
if removeHtmlComments:
htmlContent = re.sub(r'<!--(.*?)-->', '', htmlContent)
# Minify HTML content (changes type="module" to type=module)
htmlContent = minify(htmlContent, remove_empty_space=True)
# Delete JavaScript console log statements
htmlContent = removeConsoleLogStatements(htmlContent)
# Remove empty scripts
htmlContent = re.sub(r'<script[^>]*>\s*</script>', '', htmlContent)
# Restore the original strings
for index, placeholder in enumerate(placeholders):
htmlContent = htmlContent.replace(f"__FILEMELT_STRING_PLACEHOLDER_{index}__", placeholder)
htmlContent = htmlContent.replace(f"__FILEMELT_MULTILINE_STRING_PLACEHOLDER_{index}__", placeholder)
outFile.write(htmlContent)
# Minify the style of an svg
def minifySvgStyleTag(svgInput):
# Find the <style> tag and its content using regex
styleMatch = re.search(r'<style.*?>(.*?)</style>', svgInput, re.DOTALL)
if styleMatch:
styleContent = styleMatch.group(1)
# Minify the CSS using csscompressor
minifiedCss = compress(styleContent)
# Replace the original CSS with the minified CSS in the SVG
minifiedSvg = re.sub(r'<style.*?>(.*?)</style>', f'<style>{minifiedCss}</style>', svgInput, flags=re.DOTALL)
return minifiedSvg
# If no <style> tag is found, return the original SVG
return svgInput
# XML svg minification
def xmlSvgMinification(inputSvg):
try:
# Parse the input SVG file
root = xml.etree.ElementTree.fromstring(inputSvg)
# Remove unnecessary whitespace and indentation
for element in root.iter():
if element.text:
element.text = element.text.strip()
if element.tail:
element.tail = element.tail.strip()
# Serialize the minified SVG back to a string
minifiedSvg = xml.etree.ElementTree.tostring(root, encoding='utf-8').decode('utf-8')
# Remove extra whitespace between tags
minifiedSvg = re.sub(r'>\s+<', '><', minifiedSvg)
svgNoNs0Elements = re.sub(r'<ns0:(.*?)>', r'<\1>', minifiedSvg)
# Remove ns0: prefix from attributes
svgNoNs0Attributes = re.sub(r'ns0:', '', svgNoNs0Elements)
return svgNoNs0Attributes
except xml.etree.ElementTree.ParseError:
raise Exception("Invalid SVG input")
# Minify svg files
def minifySvg(inputFile, outputFile):
with open(inputFile, "r") as inFile, open(outputFile, "w") as outFile:
svgContent = inFile.read()
# Replace all strings with placeholders
placeholders = []
stringPattern = r'"(?:\\.|[^"\\])*"'
multilineStringPattern = r'`[^`]*`'
svgContent = re.sub(stringPattern, lambda x: placeholders.append(x.group()) or f"__FILEMELT_STRING_PLACEHOLDER_{len(placeholders) - 1}__", svgContent)
svgContent = re.sub(multilineStringPattern, lambda x: placeholders.append(x.group()) or f"__FILEMELT_MULTILINE_STRING_PLACEHOLDER_{len(placeholders) - 1}__", svgContent)
# Remove svg comments
if removeSvgComments:
svgContent = re.sub(r'<!--(.*?)-->', '', svgContent)
# Minify style tag of svg
svgContent = minifySvgStyleTag(svgContent)
# Restore the original strings
for index, placeholder in enumerate(placeholders):
svgContent = svgContent.replace(f"__FILEMELT_STRING_PLACEHOLDER_{index}__", placeholder)
svgContent = svgContent.replace(f"__FILEMELT_MULTILINE_STRING_PLACEHOLDER_{index}__", placeholder)
# Second pass with of minification with element tree (built-in)
svgContent = xmlSvgMinification(svgContent)
# Remove ns0
svgContent = svgContent.replace("xmlns:ns0","xmlns")
outFile.write(svgContent)
# Minify js files
def minifyJs(inputFile, outputFile):
with open(inputFile, "r") as inFile, open(outputFile, "w") as outFile:
jsContent = inFile.read()
# Replace all strings with placeholders
placeholders = []
stringPattern = r'"(?:\\.|[^"\\])*"'
multilineStringPattern = r'`[^`]*`'
jsContent = re.sub(stringPattern, lambda x: placeholders.append(x.group()) or f"__FILEMELT_STRING_PLACEHOLDER_{len(placeholders) - 1}__", jsContent)
jsContent = re.sub(multilineStringPattern, lambda x: placeholders.append(x.group()) or f"__FILEMELT_MULTILINE_STRING_PLACEHOLDER_{len(placeholders) - 1}__", jsContent)
# Remove html format comments in JavaScript
jsContent = re.sub(r'<!--(.*?)-->', '', jsContent)
# Minify JavaScript
jsContent = jsmin(jsContent)
# Restore the original strings
for index, placeholder in enumerate(placeholders):
jsContent = jsContent.replace(f"__FILEMELT_STRING_PLACEHOLDER_{index}__", placeholder)
jsContent = jsContent.replace(f"__FILEMELT_MULTILINE_STRING_PLACEHOLDER_{index}__", placeholder)
outFile.write(jsContent)
# Minify CSS files
def minifyCss(inputFile, outputFile):
with open(inputFile, "r") as inFile, open(outputFile, "w") as outFile:
cssContent = inFile.read()
outFile.write(compress(cssContent))
# Print statistics of given file
def getFileStats(inputFilename, inputSize, outputSize):
print(inputFilename)
print(str(inputSize) + " --> "+ str(outputSize) + " bytes")
percentDecrease = 1 - (outputSize / float(inputSize))
print("Size decrease: " + str(round(percentDecrease * 100, 4)) + "%\n")
### Main program
# Create the output directory if it doesn't exist
if not os.path.exists(outputFolder):
os.makedirs(outputFolder)
# Byte counts
totalInputBytes = 0
totalOutputBytes = 0
# Delete the output folder if files without sources should be removed
if deleteFilesMissingInput:
for fileName in os.listdir(outputFolder):
filePath = os.path.join(outputFolder, fileName)
try:
if os.path.isfile(filePath):
os.unlink(filePath)
elif os.path.isdir(filePath):
shutil.rmtree(filePath)
except Exception as e:
print(f"Failed to delete {filePath}: {e}")
# Search directory and minify files
for root, _, files in os.walk(inputFolder):
for filename in files:
inputFile = os.path.join(root, filename)
outputFile = os.path.join(outputFolder, filename)
if filename.endswith(".css"):
minifyJs(inputFile, outputFile)
elif filename.endswith(".html"):
minifyHtml(inputFile, outputFile)
elif filename.endswith(".js") and minifyJsFiles:
minifyJs(inputFile, outputFile)
elif filename.endswith(".svg"):
minifySvg(inputFile, outputFile)
else:
# Copy unsupported files to the output folder
shutil.copy(inputFile, outputFile)
# Calculate total bytes
inputSize = os.path.getsize(inputFile)
outputSize = os.path.getsize(outputFile)
totalInputBytes += inputSize
totalOutputBytes += outputSize
if printFileStatistics:
getFileStats(filename, inputSize, outputSize)
print("Total bytes in input folder: " + str(totalInputBytes) + " bytes")
print("Total bytes in output folder: " + str(totalOutputBytes) + " bytes")
percentDecrease = 1 - (totalOutputBytes / float(totalInputBytes))
print("Total size decrease: " + str(round(percentDecrease * 100, 4)) + "%")