-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
34 lines (27 loc) · 1.09 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# -*- coding: utf-8 -*-
import scrapy
# class ExampleSpider(scrapy.Spider):
# name = 'quotes_spider'
# allowed_domains = ['quotes.toscrape.com']
# start_urls = ['http://quotes.toscrape.com/']
#
# def parse(self, response):
# quotes = response.xpath("//div[@class='quote']//span[@class='text']/text()").extract()
# author = response.xpath("//div[@class='quote']//small[@class='author']/text()").extract()
#
# print("Type is: ", type(quotes))
# print("Type is:", type(author))
#
# yield {'quotes': quotes,
# 'author': author}
class ExampleSpider(scrapy.Spider):
name = 'quotes_spider'
allowed_domains = ['arstechnica.com']
start_urls = ['https://arstechnica.com/gaming/2019/08/ewan-mcgregor-confirms-he-will-return-as-obi-wan-for-new-star-wars-series/']
def parse(self, response):
print(response)
quotes = response.xpath("//div[@class='caption-text']//a/@href").extract()
print(quotes)
yield {'quotes': quotes}
from scrapy import cmdline
cmdline.execute("scrapy crawl quotes_spider".split())