Skip to content

Commit

Permalink
Bug fix for RSS2 detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Frédéric Guillot committed Dec 16, 2013
1 parent d0b7e9c commit 1d3f58c
Show file tree
Hide file tree
Showing 4 changed files with 1,125 additions and 2 deletions.
14 changes: 12 additions & 2 deletions lib/PicoFeed/Filter.php
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,20 @@ public function removeHTMLTags($data)

public static function stripXmlTag($data)
{
if (strpos($data, '<?xml') !== false) {
$data = substr($data, strrpos($data, '?>') + 2);
if (strpos($data, '<?xml ') !== false) {
$data = ltrim(substr($data, strpos($data, '?>') + 2));
}

do {

$pos = strpos($data, '<?xml-stylesheet ');

if ($pos !== false) {
$data = ltrim(substr($data, strpos($data, '?>') + 2));
}

} while ($pos !== false && $pos < 200);

return $data;
}

Expand Down
43 changes: 43 additions & 0 deletions tests/FilterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,49 @@

class FilterTest extends PHPUnit_Framework_TestCase
{
public function testStripXmlTag()
{
$data = file_get_contents('tests/fixtures/ezrss.it');
$this->assertEquals('<!DOC', substr(Filter::stripXmlTag($data), 0, 5));

$data = file_get_contents('tests/fixtures/fulltextrss.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/sametmax.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/grotte_barbu.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/ibash.ru.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/pcinpact.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/resorts.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/rue89.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/cercle.psy.xml');
$this->assertEquals('<rss', substr(Filter::stripXmlTag($data), 0, 4));

$data = file_get_contents('tests/fixtures/lagrange.xml');
$this->assertEquals('<feed', substr(Filter::stripXmlTag($data), 0, 5));

$data = file_get_contents('tests/fixtures/atom.xml');
$this->assertEquals('<feed', substr(trim(Filter::stripXmlTag($data)), 0, 5));

$data = file_get_contents('tests/fixtures/atomsample.xml');
$this->assertEquals('<feed', substr(trim(Filter::stripXmlTag($data)), 0, 5));

$data = file_get_contents('tests/fixtures/planete-jquery.xml');
$this->assertEquals('<rdf:RDF', trim(substr(trim(Filter::stripXmlTag($data)), 0, 8)));
}


public function testRelativeScheme()
{
$f = new Filter('<a href="//linuxfr.org">link</a>', 'http://blabla');
Expand Down
3 changes: 3 additions & 0 deletions tests/ReaderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ public function testDownloadWithCache()

public function testDetectFormat()
{
$reader = new Reader(file_get_contents('tests/fixtures/sametmax.xml'));
$this->assertInstanceOf('PicoFeed\Parsers\Rss20', $reader->getParser());

$reader = new Reader(file_get_contents('tests/fixtures/rss_0.92.xml'));
$this->assertInstanceOf('PicoFeed\Parsers\Rss92', $reader->getParser());

Expand Down
Loading

0 comments on commit 1d3f58c

Please sign in to comment.