-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemopachogn.py
52 lines (44 loc) · 1.43 KB
/
demopachogn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# main.py
# Imports for web server
import os
from flask import Flask
# Imports for web scraping
from requests_html import AsyncHTMLSession
import asyncio
import pyppeteer
app = Flask(__name__)
async def get_page(url):
new_loop=asyncio.new_event_loop()
asyncio.set_event_loop(new_loop)
session = AsyncHTMLSession()
print("Launching browser...")
browser = await pyppeteer.launch({
# 'executablePath': 'google-chrome-stable',
'executablePath': 'google-chrome-unstable',
'ignoreHTTPSErrors':True,
'dumpio':True,
'headless':True,
'handleSIGINT':False,
'handleSIGTERM':False,
'handleSIGHUP':False
})
print("Launched browser...")
session._browser = browser
resp_page = await session.get(url)
print("Got response from page...")
await resp_page.html.arender()
print("Rendered page...")
return resp_page.html # note, changed from content
@app.route("/test/<path:url>")
def get_page_name(url):
print("Got request to collect ", url)
try:
page_html = asyncio.run(get_page(url))
except:
return "Error retrieving match content from URL"
return page_html.find('title')[0].text
@app.route("/")
def get_toscrape_name():
return get_page_name("http://toscrape.com")
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))