-
Notifications
You must be signed in to change notification settings - Fork 0
/
wuyun.py
65 lines (55 loc) · 1.36 KB
/
wuyun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
# coding=utf-8
import traceback
from bs4 import BeautifulSoup
import requests
import save_pdf
url_referer = "https://wooyun.js.org/"
def get_article_links(url):
article_links = []
html = requests.get(url).text
soup = BeautifulSoup(html, "lxml")
nodes = soup.select(".link > a")
for node in nodes:
link = node["href"]
print(link)
article_links.append(url_referer + link.encode("utf-8"))
return article_links
filename = "log/400.txt"
def main():
try:
# article_links = get_article_links(url_referer)
fd = open(filename, "r")
article_links = fd.read().split("\n")
fd.close()
for article_link in article_links:
try:
print article_link
# print chardet.detect(article_link)
save_pdf.save_pdf_by_url(article_link, ["body"], directory="./anquan/")
# exit(0)
article_links.remove(article_link)
fd = open(filename, "w")
fd.write("\n".join(article_links))
fd.close()
pass
except Exception as e:
# raise e
fhandle = open("log/anquan_err.log", "a")
fhandle.write("[start]\n")
fhandle.write(article_link + "\n")
fhandle.write(traceback.format_exc())
fhandle.write("[end]\n")
fhandle.close()
# exit(0)
pass
pass
except Exception as e:
# print
raise e
pass
fd = open(filename, "w")
fd.write("\n".join(article_links))
fd.close()
if __name__ == '__main__':
main()