-
Notifications
You must be signed in to change notification settings - Fork 1
/
listHTMLimages.py
41 lines (33 loc) · 1.08 KB
/
listHTMLimages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
Display a list of all images contained in all HTML files in a folder
"""
import os
from html.parser import HTMLParser
__author__ = 'Lindsay Ward'
PATH = '/Users/sci-lmw1/GoogleDrive/OffCampus/JCUS/CP1406/CP1406 2016 SP51/_Pracs'
class ImageFinderParser(HTMLParser):
"""
Custom subclass of HTMLParser with one overridden method, to handle img elements
"""
def handle_starttag(self, tag, attrs):
"""
Print the src of all img elements
:param tag: HTML element as str
:param attrs: attributes as list of tuples (attribute, value)
:return: None
"""
if tag == 'img':
# print("Images:")
for attr in attrs:
if attr[0] == 'src':
print('\t', attr[1])
def main():
os.chdir(PATH)
# create instance of custom parser to print images
parser = ImageFinderParser()
for filename in os.listdir('.'):
if filename.endswith('.html'):
print(filename)
with open(filename, encoding='utf8') as f:
parser.feed(f.read())
main()