index.html

---
layout: default
title: A Fast and Powerful Scraping and Web Crawling Framework
---
{% assign stable = site.data.scrapy.stable %}
{% assign oldstable = site.data.scrapy.oldstable %}
{% assign devel = site.data.scrapy.development %}

<div class="container">

  <div class="first-row">

    <div class="block-left">
      <div id="scrapy-logo"></div>
      <p>An open source and collaborative framework for extracting the data you need from websites.
      </p>
      <p>In a fast, simple, yet extensible way.</p>

      <p class="maintained-by">
        Maintained by
        <a href="https://www.zyte.com/" target="blank">
          Zyte
        </a>
        and
        <a href="https://github.com/scrapy/scrapy/graphs/contributors" target="blank">
          many other contributors
        </a>
      </p>

    {% include badges-bar.html %}
    </div>

    <div class="block-right">
      {% include download-button.html %}
    </div>

  </div>
</div>

<div class="second-row">
  <div class="container code-box-line">
    <div class="code-box">
      <div class="box-header">
        <p>Terminal<span class="close-btn">&bull;</span></p>
      </div>
      <div class="box-code tab-page active-page">
        <pre>
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> pip install scrapy
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> cat > myspider.py &lt;&lt;EOF
{% highlight python %}
import scrapy

class BlogSpider(scrapy.Spider):
    name = 'blogspider'
    start_urls = ['https://www.zyte.com/blog/']

    def parse(self, response):
        for title in response.css('.oxy-post-title'):
            yield {'title': title.css('::text').get()}

        for next_page in response.css('a.next'):
            yield response.follow(next_page, self.parse)
{% endhighlight %}EOF
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> scrapy runspider myspider.py
</pre>
      </div>
    </div>

    <div class="code-subs"><p>Build and run your<br /><span class="highlight">web spiders</span></p></div>

  </div>

  <div class="container code-box-line">
    <div class="code-box">
      <div class="box-header">
        <p>Terminal<span class="close-btn">&bull;</span></p>
      </div>
      <div class="box-code tab-page active-page">
        <pre>
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> pip install shub
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> shub login
<span class="comments">Insert your Zyte Scrapy Cloud API Key: <span class="placeholder">&lt;API_KEY&gt;</span></span>

<span class="comments"># Deploy the spider to Zyte Scrapy Cloud</span>
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> shub deploy</span>

<span class="comments"># Schedule the spider for execution</span>
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> shub schedule blogspider <span class="comments">
Spider blogspider scheduled, watch it running here:
https://app.zyte.com/p/26731/job/1/8</span>

<span class="comments"># Retrieve the scraped data</span>
<span class="prompt" onselectstart="return false"><i class="fa fa-dollar"></i></span> shub items 26731/1/8
{% highlight python %}
{"title": "Improved Frontera: Web Crawling at Scale with Python 3 Support"}
{"title": "How to Crawl the Web Politely with Scrapy"}
...
{% endhighlight %}</pre>
      </div>
    </div>

    <div class="code-subs"><p>Deploy them to<br /><a href="https://www.zyte.com/scrapy-cloud/" title=""><span class="highlight">Zyte Scrapy Cloud</span></a></p>
    <p class="sub-sub">or use <a href="https://github.com/scrapy/scrapyd" title="Scrapyd"><span class="highlight">Scrapyd</span></a> to host the spiders on your own server</p></div>
  </div>

</div>

<div class="container">

  <div class="third-row">
    <div class="block-01">
      <i class="fa fa-bolt fa-4x"> </i>
      <h3>Fast and powerful</h3>
      <p>write the rules to extract the data and let Scrapy do the rest</p>
    </div>
    <div class="block-02">
      <i class="fa fa-puzzle-piece fa-4x"> </i>
      <h3>Easily extensible</h3>
      <p>extensible by design, plug new functionality easily without having to touch the core</p>
    </div>
    <div class="block-03">
      <i class="fa fa-cubes fa-4x"> </i>
      <h3>Portable, Python</h3>
      <p>written in Python and runs on Linux, Windows, Mac and BSD</p>
    </div>
  </div>
</div>

<div class='fourth-row'>
  <div class="container">
    <div class="block-left">
      <h2>Healthy community</h2>
      <ul>
        <li>- 43,100 stars, 9,600 forks and 1,800 watchers on <a href="https://github.com/scrapy/scrapy">GitHub</a></li>
        <li>- 5.500 followers on <a href="https://twitter.com/ScrapyProject">Twitter</a></li>
        <li>- 18,000 questions on <a href="http://stackoverflow.com/tags/scrapy/info">StackOverflow</a></li>
      </ul>
    </div>
    <div class="block-right">
      <h2>Want to know more?</h2>
      <ul>
        <li><a href="http://docs.scrapy.org/en/{{ stable.rtd }}/intro/overview.html">- Discover Scrapy at a glance</a></li>
        <li><a href="../companies/">- Meet the companies using Scrapy</a></li>
      </ul>

    </div>
  </div>
</div>