Skip to content

Commit

Permalink
Add new grabber rules
Browse files Browse the repository at this point in the history
  • Loading branch information
Frédéric Guillot committed Sep 19, 2013
1 parent b099922 commit 16acc9e
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 3 deletions.
11 changes: 8 additions & 3 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -335,20 +335,25 @@ When the content scraper is enabled, everything will be slower because for each

- *.blog.lemonde.fr
- *.blog.nytimes.com
- *.nytimes.php
- *.nytimes.com
- *.slate.com
- *.theguardian.com
- *.wikipedia.org
- *.wired.com
- *.wsj.com
- github.com
- lifehacker.com
- plus.google.com
- rue89.com
- smallhousebliss.com
- techcrunch.com
- www.bbc.co.uk
- www.businessweek.com
- www.cnn.com
- www.egscomics.com
- www.forbes.com
- www.lemonde.fr
- www.lepoint.fr
- www.npr.org
- www.numerama.com
- www.slate.fr
- www.theguardian.com
- www.slate.fr
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
'//div[@id="article-wrapper"]',
),
'strip' => array(
'//*[contains(@class, "promo")]',
),
);
19 changes: 19 additions & 0 deletions lib/PicoFeed/Rules/.wired.com.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php
return array(
'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/',
'body' => array(
'//div[@class="entry"]',
),
'strip' => array(
'//script',
'//style',
'//*[@id="linker_widget"]',
'//*[contains(@class, "bio")]',
'//*[contains(@class, "entry-footer")]',
'//*[contains(@class, "mobify_backtotop_link")]',
'//*[contains(@class, "gallery-navigation")]',
'//*[contains(@class, "gallery-thumbnail")]',
'//img[contains(@src, "1x1")]',
'//a[contains(@href, "creativecommons")]',
),
);
7 changes: 7 additions & 0 deletions lib/PicoFeed/Rules/plus.google.com.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?php
return array(
'test_url' => 'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1',
'body' => array(
'//div[@role="article"]/div[contains(@class, "eE")]',
),
);
11 changes: 11 additions & 0 deletions lib/PicoFeed/Rules/www.businessweek.com.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?php
return array(
'test_url' => 'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software',
'body' => array(
'//div[@id="lead_graphic"]',
'//div[@id="article_body"]',
),
'strip' => array(
'//*[contains(@class, "related_item")]',
),
);
16 changes: 16 additions & 0 deletions lib/PicoFeed/Rules/www.lepoint.fr.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php
return array(
'test_url' => 'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php',
'body' => array(
'//article',
),
'strip' => array(
'//script',
'//style',
'//*[contains(@class, "info_article")]',
'//*[contains(@class, "fildariane_titre")]',
'//*[contains(@class, "entete2_article")]',
'//*[contains(@class, "signature_article")]',
'//*[contains(@id, "share")]',
)
);
15 changes: 15 additions & 0 deletions lib/PicoFeed/Rules/www.npr.org.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?php
return array(
'test_url' => 'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut',
'body' => array(
'//div[@id="storytext"]',
),
'strip' => array(
'//script',
'//style',
'//*[@class="bucket img"]',
'//*[@class="creditwrap"]',
'//*[@class="captionwrap"]',
'//*[contains(@class, "enlargebtn")]',
),
);

0 comments on commit 16acc9e

Please sign in to comment.