From 813152ee13c9e5e6b9a5128641917bffd540e1f9 Mon Sep 17 00:00:00 2001 From: repat Date: Thu, 5 Sep 2019 01:32:22 -0700 Subject: [PATCH] Initial commit --- .gitignore | 3 + LICENSE | 22 ++ README.md | 61 ++++ composer.json | 30 ++ phpunit.xml.dist | 29 ++ src/Repat/CrawlQueue/RedisCrawlQueue.php | 133 ++++++++ tests/RedisCrawlQueueTest.php | 157 ++++++++++ tests/TestCase.php | 76 +++++ tests/server/.gitignore | 2 + tests/server/package-lock.json | 383 +++++++++++++++++++++++ tests/server/package.json | 15 + tests/server/server.js | 106 +++++++ tests/server/start_server.sh | 12 + 13 files changed, 1029 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 composer.json create mode 100644 phpunit.xml.dist create mode 100644 src/Repat/CrawlQueue/RedisCrawlQueue.php create mode 100644 tests/RedisCrawlQueueTest.php create mode 100644 tests/TestCase.php create mode 100644 tests/server/.gitignore create mode 100644 tests/server/package-lock.json create mode 100644 tests/server/package.json create mode 100644 tests/server/server.js create mode 100755 tests/server/start_server.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..073e37a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build +composer.lock +vendor diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..57b811c --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +[MIT LICENSE] + +Copyright (c) 2019 repat, https://repat.de + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +Software), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, andor sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d8ac73c --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +# spatie-crawler-redis +[![Latest Version on Packagist](https://img.shields.io/packagist/v/repat/spatie-crawler-redis.svg?style=flat-square)](https://packagist.org/packages/repat/spatie-crawler-redis) +[![Total Downloads](https://img.shields.io/packagist/dt/repat/spatie-crawler-redis.svg?style=flat-square)](https://packagist.org/packages/repat/spatie-crawler-redis) + +**spatie-crawler-redis** is an alternative CrawlerQueue implementing the `Spatie\Crawler\CrawlQueue\CrawlQueue` interface using Redis Hashes. + +## Installation +`$ composer require repat/spatie-crawler-redis` + +## Example +Create a `Predis\Client` beforehand if you need options, such as selecting a database. If you don't pass a client, a new one without options will be used. Predis assumes `127.0.0.1`, `6379` and `0` as default host, port and database. You can also pass a custom prefix, otherwise `uniqid()` will be used. + +```php +// see https://github.com/nrk/predis for options +$options = [ + 'database' => 7, +]; + +$prefix = uniqid() . ':'; // same as passing no prefix + +$redisClient = new \Predis\Client($options); + +// ... +->setCrawlQueue(new RedisCrawlQueue($redisClient, $prefix)) + +// uses new \Predis\Client without options +->setCrawlQueue(new RedisCrawlQueue()) +``` + +## TODO +* `phpredis` support + +## Testing +> Thanks spatie for the tests. These are the instructions: + +To run the tests you'll have to start the included node based server first in a separate terminal window. + +```bash +cd tests/server +npm install +./start_server.sh +``` + +With the server running, you can start testing. +```bash +vendor/bin/phpunit +``` + +## License +* MIT, see [LICENSE](https://github.com/repat/spatie-crawler-redis/blob/master/LICENSE) + +## Version +* Version 0.1 + +## Contact +#### repat +* Homepage: https://repat.de +* e-mail: repat@repat.de +* Twitter: [@repat123](https://twitter.com/repat123 "repat123 on twitter") + +[![Flattr this git repo](http://api.flattr.com/button/flattr-badge-large.png)](https://flattr.com/submit/auto?user_id=repat&url=https://github.com/repat/spatie-crawler-redis&title=spatie-crawler-redis&language=&tags=github&category=software) diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..f817215 --- /dev/null +++ b/composer.json @@ -0,0 +1,30 @@ +{ + "name": "repat/spatie-crawler-redis", + "description": "Redis CrawlQueue for spatie/crawler", + "keywords": ["spatie", "crawler", "redis", "crawlqueue", "predis"], + "homepage": "https://repat.de", + "license": "MIT", + "version" : "0.1", + "authors": [ + {"name": "repat", "email": "repat@repat.de"} + ], + "require": { + "php": ">=7.1", + "spatie/crawler": "^4.6", + "predis/predis": "^1.1" + }, + "require-dev": { + "phpunit/phpunit": "^7.0", + "larapack/dd": "^1.1" + }, + "autoload": { + "psr-4": { + "Repat\\CrawlQueue\\": "src/Repat/CrawlQueue" + } + }, + "autoload-dev": { + "psr-4": { + "Spatie\\Crawler\\Test\\": "tests" + } + } +} diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..1f0d1c0 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,29 @@ + + + + + tests + + + + + src/ + + + + + + + + + + diff --git a/src/Repat/CrawlQueue/RedisCrawlQueue.php b/src/Repat/CrawlQueue/RedisCrawlQueue.php new file mode 100644 index 0000000..a356d7e --- /dev/null +++ b/src/Repat/CrawlQueue/RedisCrawlQueue.php @@ -0,0 +1,133 @@ +redis = $redis; + if (is_null($redis)) { + $this->redis = new Client(); + } + + $this->prefix = $prefix ?? uniqid() . ':'; + + // make sure prefix has a colon at the end + if (substr($this->prefix, -1) !== ':') { + $this->prefix .= ':'; + } + } + + public function __destruct() + { + $keys = $this->redis->hkeys(self::URLS); + foreach ($keys as $key) { + // if key is prefixed + // + if (substr($key, 0, strlen($this->prefix)) === $this->prefix) { + $this->redis->hdel(self::URLS, $key); + } + } + } + + public function add(CrawlUrl $url) : CrawlQueue + { + $urlString = (string) $url->url; + + if (!$this->has($urlString)) { + $url->setId($this->prefix . $urlString); + + $this->redis->hset(self::URLS, $this->prefix . $urlString, serialize($url)); + $this->redis->hset(self::PENDING_URLS, $this->prefix . $urlString, serialize($url)); + } + + return $this; + } + + public function has($crawlUrl) : bool + { + if ($crawlUrl instanceof CrawlUrl) { + $url = $this->prefix . (string) $crawlUrl->url; + } elseif ($crawlUrl instanceof UriInterface) { + $url = $this->prefix . (string) $crawlUrl; + } elseif (is_string($crawlUrl)) { + $url = $crawlUrl; + } else { + throw InvalidUrl::unexpectedType($crawlUrl); + } + + return (bool) $this->redis->hexists(self::URLS, $url); + } + + public function hasPendingUrls() : bool + { + return (bool) $this->redis->hlen(self::PENDING_URLS); + } + + public function getUrlById($id) : CrawlUrl + { + if (!$this->has($id)) { + throw new UrlNotFoundByIndex("Crawl url {$id} not found in hashes."); + } + return unserialize($this->redis->hget(self::URLS, $id)); + } + + public function getFirstPendingUrl() : ?CrawlUrl + { + $keys = $this->redis->hkeys(self::PENDING_URLS); + + foreach ($keys as $key) { + return unserialize($this->redis->hget(self::PENDING_URLS, $key)); + } + + return null; + } + + public function hasAlreadyBeenProcessed(CrawlUrl $url) : bool + { + $url = (string) $url->url; + + if ($this->redis->hexists(self::PENDING_URLS, $this->prefix . $url)) { + return false; + } + + if ($this->redis->hexists(self::URLS, $this->prefix . $url)) { + return true; + } + + return false; + } + + public function markAsProcessed(CrawlUrl $crawlUrl) + { + $this->redis->hdel(self::PENDING_URLS, $this->prefix . (string) $crawlUrl->url); + } +} diff --git a/tests/RedisCrawlQueueTest.php b/tests/RedisCrawlQueueTest.php new file mode 100644 index 0000000..ea3ea3f --- /dev/null +++ b/tests/RedisCrawlQueueTest.php @@ -0,0 +1,157 @@ +client = new Client(['database' => $dbNr]); + + // try to find an empty DB + if ($this->client->dbsize() === 0) { + break; + } + } + + $this->crawlQueue = new RedisCrawlQueue($this->client); + } + + /** @test */ + public function an_url_can_be_added() + { + $this->client->flushdb(); + + $crawlUrl = $this->createCrawlUrl('https://example.com'); + $this->crawlQueue->add($crawlUrl); + + $this->assertEquals($crawlUrl, $this->crawlQueue->getFirstPendingUrl()); + + $this->client->flushdb(); + } + + /** @test */ + public function it_can_determine_if_there_are_pending_urls() + { + $this->client->flushdb(); + + $this->assertFalse($this->crawlQueue->hasPendingUrls()); + + $this->crawlQueue->add($this->createCrawlUrl('https://example.com')); + + $this->assertTrue($this->crawlQueue->hasPendingUrls()); + + $this->client->flushdb(); + } + + /** @test */ + public function it_can_get_an_url_at_the_specified_index() + { + $this->client->flushdb(); + + $url1 = $this->createCrawlUrl('https://example1.com/'); + $url2 = $this->createCrawlUrl('https://example2.com/'); + + $this->crawlQueue->add($url1); + $this->crawlQueue->add($url2); + + $this->assertEquals( + 'https://example1.com/', + (string) $this->crawlQueue->getUrlById($url1->getId())->url + ); + $this->assertEquals( + 'https://example2.com/', + (string) $this->crawlQueue->getUrlById($url2->getId())->url + ); + + $this->client->flushdb(); + } + + /** @test */ + public function it_can_determine_if_has_a_given_url() + { + $this->client->flushdb(); + + $crawlUrl = $this->createCrawlUrl('https://example1.com/'); + + $this->assertFalse($this->crawlQueue->has($crawlUrl)); + + $this->crawlQueue->add($crawlUrl); + + $this->assertTrue($this->crawlQueue->has($crawlUrl)); + + $this->client->flushdb(); + } + + /** @test */ + public function it_can_mark_an_url_as_processed() + { + $this->client->flushdb(); + + $crawlUrl = $this->createCrawlUrl('https://example1.com/'); + + $this->assertFalse($this->crawlQueue->hasAlreadyBeenProcessed($crawlUrl)); + + $this->crawlQueue->add($crawlUrl); + + $this->assertFalse($this->crawlQueue->hasAlreadyBeenProcessed($crawlUrl)); + + $this->crawlQueue->markAsProcessed($crawlUrl); + + $this->assertTrue($this->crawlQueue->hasAlreadyBeenProcessed($crawlUrl)); + + $this->client->flushdb(); + } + + /** @test */ + public function it_can_remove_all_processed_urls_from_the_pending_urls() + { + $this->client->flushdb(); + + $crawlUrl1 = $this->createCrawlUrl('https://example1.com/'); + $crawlUrl2 = $this->createCrawlUrl('https://example2.com/'); + + $this->crawlQueue + ->add($crawlUrl1) + ->add($crawlUrl2); + + $this->crawlQueue->markAsProcessed($crawlUrl1); + + $pendingUrlCount = 0; + + while ($url = $this->crawlQueue->getFirstPendingUrl()) { + $pendingUrlCount++; + $this->crawlQueue->markAsProcessed($url); + } + + $this->assertEquals(1, $pendingUrlCount); + + $this->client->flushdb(); + } + + protected function createCrawlUrl(string $url): CrawlUrl + { + return CrawlUrl::create(new Uri($url)); + } +} diff --git a/tests/TestCase.php b/tests/TestCase.php new file mode 100644 index 0000000..833a044 --- /dev/null +++ b/tests/TestCase.php @@ -0,0 +1,76 @@ +markTestSkipped('The testserver is not running.'); + } + } + + protected function getLogContents(): string + { + return file_get_contents(static::$logPath); + } + + protected function assertCrawledOnce($urls) + { + $logContent = $this->getLogContents(); + + foreach ($urls as $url) { + $logMessage = "hasBeenCrawled: {$url['url']}"; + + if (isset($url['foundOn'])) { + $logMessage .= " - found on {$url['foundOn']}"; + } + + $logMessage .= PHP_EOL; + + $this->assertEquals(1, substr_count($logContent, $logMessage), "Did not find {$logMessage} exactly one time in the log but ".substr_count($logContent, $logMessage)." times. Contents of log\n{$logContent}"); + } + } + + protected function assertNotCrawled($urls) + { + $logContent = $this->getLogContents(); + + foreach ($urls as $url) { + $logMessage = "hasBeenCrawled: {$url['url']}"; + + if (isset($url['foundOn'])) { + $logMessage .= " - found on {$url['foundOn']}"; + } + + $logMessage .= PHP_EOL; + + $this->assertEquals(0, substr_count($logContent, $logMessage), "Did find {$logMessage} in the log"); + } + } + + protected function assertCrawledUrlCount(int $count) + { + $logContent = file_get_contents(static::$logPath); + + $actualCount = substr_count($logContent, 'hasBeenCrawled'); + + $this->assertEquals($count, $actualCount, "Crawled `{$actualCount}` urls instead of the expected {$count}"); + } + + public function resetLog() + { + static::$logPath = __DIR__.'/temp/crawledUrls.txt'; + + file_put_contents(static::$logPath, 'start log'.PHP_EOL); + } +} diff --git a/tests/server/.gitignore b/tests/server/.gitignore new file mode 100644 index 0000000..167ab9f --- /dev/null +++ b/tests/server/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +yarn.lock \ No newline at end of file diff --git a/tests/server/package-lock.json b/tests/server/package-lock.json new file mode 100644 index 0000000..f6cd920 --- /dev/null +++ b/tests/server/package-lock.json @@ -0,0 +1,383 @@ +{ + "name": "server", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "accepts": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.4.tgz", + "integrity": "sha1-hiRnWMfdbSGmR0/whKR0DsBesh8=", + "requires": { + "mime-types": "~2.1.16", + "negotiator": "0.6.1" + } + }, + "array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" + }, + "body-parser": { + "version": "1.18.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.2.tgz", + "integrity": "sha1-h2eKGdhLR9hZuDGZvVm84iKxBFQ=", + "requires": { + "bytes": "3.0.0", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "~1.1.1", + "http-errors": "~1.6.2", + "iconv-lite": "0.4.19", + "on-finished": "~2.3.0", + "qs": "6.5.1", + "raw-body": "2.3.2", + "type-is": "~1.6.15" + } + }, + "bytes": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz", + "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=" + }, + "content-disposition": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.2.tgz", + "integrity": "sha1-DPaLud318r55YcOoUXjLhdunjLQ=" + }, + "content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==" + }, + "cookie": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz", + "integrity": "sha1-5+Ch+e9DtMi6klxcWpboBtFoc7s=" + }, + "cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" + }, + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "depd": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", + "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=" + }, + "destroy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", + "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" + }, + "ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" + }, + "encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=" + }, + "escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" + }, + "etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=" + }, + "express": { + "version": "4.16.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.16.2.tgz", + "integrity": "sha1-41xt/i1kt9ygpc1PIXgb4ymeB2w=", + "requires": { + "accepts": "~1.3.4", + "array-flatten": "1.1.1", + "body-parser": "1.18.2", + "content-disposition": "0.5.2", + "content-type": "~1.0.4", + "cookie": "0.3.1", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "~1.1.1", + "encodeurl": "~1.0.1", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "1.1.0", + "fresh": "0.5.2", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "~2.3.0", + "parseurl": "~1.3.2", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.2", + "qs": "6.5.1", + "range-parser": "~1.2.0", + "safe-buffer": "5.1.1", + "send": "0.16.1", + "serve-static": "1.13.1", + "setprototypeof": "1.1.0", + "statuses": "~1.3.1", + "type-is": "~1.6.15", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "dependencies": { + "setprototypeof": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.0.tgz", + "integrity": "sha512-BvE/TwpZX4FXExxOxZyRGQQv651MSwmWKZGqvmPcRIjDqWub67kTKuIMx43cZZrS/cBBzwBcNDWoFxt2XEFIpQ==" + }, + "statuses": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.3.1.tgz", + "integrity": "sha1-+vUbnrdKrvOzrPStX2Gr8ky3uT4=" + } + } + }, + "finalhandler": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.0.tgz", + "integrity": "sha1-zgtoVbRYU+eRsvzGgARtiCU91/U=", + "requires": { + "debug": "2.6.9", + "encodeurl": "~1.0.1", + "escape-html": "~1.0.3", + "on-finished": "~2.3.0", + "parseurl": "~1.3.2", + "statuses": "~1.3.1", + "unpipe": "~1.0.0" + }, + "dependencies": { + "statuses": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.3.1.tgz", + "integrity": "sha1-+vUbnrdKrvOzrPStX2Gr8ky3uT4=" + } + } + }, + "forwarded": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", + "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" + }, + "fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=" + }, + "http-errors": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.6.2.tgz", + "integrity": "sha1-CgAsyFcHGSp+eUbO7cERVfYOxzY=", + "requires": { + "depd": "1.1.1", + "inherits": "2.0.3", + "setprototypeof": "1.0.3", + "statuses": ">= 1.3.1 < 2" + }, + "dependencies": { + "depd": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.1.tgz", + "integrity": "sha1-V4O04cRZ8G+lyif5kfPQbnoxA1k=" + } + } + }, + "iconv-lite": { + "version": "0.4.19", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.19.tgz", + "integrity": "sha512-oTZqweIP51xaGPI4uPa56/Pri/480R+mo7SeU+YETByQNhDG55ycFyNLIgta9vXhILrxXDmF7ZGhqZIcuN0gJQ==" + }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "ipaddr.js": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.6.0.tgz", + "integrity": "sha1-4/o1e3c9phnybpXwSdBVxyeW+Gs=" + }, + "media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=" + }, + "merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" + }, + "methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=" + }, + "mime": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.4.1.tgz", + "integrity": "sha512-KI1+qOZu5DcW6wayYHSzR/tXKCDC5Om4s1z2QJjDULzLcmf3DvzS7oluY4HCTrc+9FiKmWUgeNLg7W3uIQvxtQ==" + }, + "mime-db": { + "version": "1.33.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.33.0.tgz", + "integrity": "sha512-BHJ/EKruNIqJf/QahvxwQZXKygOQ256myeN/Ew+THcAa5q+PjyTTMMeNQC4DZw5AwfvelsUrA6B67NKMqXDbzQ==" + }, + "mime-types": { + "version": "2.1.18", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.18.tgz", + "integrity": "sha512-lc/aahn+t4/SWV/qcmumYjymLsWfN3ELhpmVuUFjgsORruuZPVSwAQryq+HHGvO/SI2KVX26bx+En+zhM8g8hQ==", + "requires": { + "mime-db": "~1.33.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, + "negotiator": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz", + "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk=" + }, + "on-finished": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", + "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", + "requires": { + "ee-first": "1.1.1" + } + }, + "parseurl": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.2.tgz", + "integrity": "sha1-/CidTtiZMRlGDBViUyYs3I3mW/M=" + }, + "path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" + }, + "proxy-addr": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.3.tgz", + "integrity": "sha512-jQTChiCJteusULxjBp8+jftSQE5Obdl3k4cnmLA6WXtK6XFuWRnvVL7aCiBqaLPM8c4ph0S4tKna8XvmIwEnXQ==", + "requires": { + "forwarded": "~0.1.2", + "ipaddr.js": "1.6.0" + } + }, + "qs": { + "version": "6.5.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.1.tgz", + "integrity": "sha512-eRzhrN1WSINYCDCbrz796z37LOe3m5tmW7RQf6oBntukAG1nmovJvhnwHHRMAfeoItc1m2Hk02WER2aQ/iqs+A==" + }, + "range-parser": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.0.tgz", + "integrity": "sha1-9JvmtIeJTdxA3MlKMi9hEJLgDV4=" + }, + "raw-body": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.3.2.tgz", + "integrity": "sha1-vNYMd9Prk83gBQKVw/N5OJvIj4k=", + "requires": { + "bytes": "3.0.0", + "http-errors": "1.6.2", + "iconv-lite": "0.4.19", + "unpipe": "1.0.0" + } + }, + "safe-buffer": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz", + "integrity": "sha512-kKvNJn6Mm93gAczWVJg7wH+wGYWNrDHdWvpUmHyEsgCtIwwo3bqPtV4tR5tuPaUhTOo/kvhVwd8XwwOllGYkbg==" + }, + "send": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/send/-/send-0.16.1.tgz", + "integrity": "sha512-ElCLJdJIKPk6ux/Hocwhk7NFHpI3pVm/IZOYWqUmoxcgeyM+MpxHHKhb8QmlJDX1pU6WrgaHBkVNm73Sv7uc2A==", + "requires": { + "debug": "2.6.9", + "depd": "~1.1.1", + "destroy": "~1.0.4", + "encodeurl": "~1.0.1", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "~1.6.2", + "mime": "1.4.1", + "ms": "2.0.0", + "on-finished": "~2.3.0", + "range-parser": "~1.2.0", + "statuses": "~1.3.1" + }, + "dependencies": { + "statuses": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.3.1.tgz", + "integrity": "sha1-+vUbnrdKrvOzrPStX2Gr8ky3uT4=" + } + } + }, + "serve-static": { + "version": "1.13.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.13.1.tgz", + "integrity": "sha512-hSMUZrsPa/I09VYFJwa627JJkNs0NrfL1Uzuup+GqHfToR2KcsXFymXSV90hoyw3M+msjFuQly+YzIH/q0MGlQ==", + "requires": { + "encodeurl": "~1.0.1", + "escape-html": "~1.0.3", + "parseurl": "~1.3.2", + "send": "0.16.1" + } + }, + "setprototypeof": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.0.3.tgz", + "integrity": "sha1-ZlZ+NwQ+608E2RvWWMDL77VbjgQ=" + }, + "statuses": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz", + "integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew==" + }, + "type-is": { + "version": "1.6.16", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.16.tgz", + "integrity": "sha512-HRkVv/5qY2G6I8iab9cI7v1bOIdhm94dVjQCPFElW9W+3GeDOSHmy2EBYe4VTApuzolPcmgFTN3ftVJRKR2J9Q==", + "requires": { + "media-typer": "0.3.0", + "mime-types": "~2.1.18" + } + }, + "unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" + }, + "utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=" + }, + "vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" + } + } +} diff --git a/tests/server/package.json b/tests/server/package.json new file mode 100644 index 0000000..8f41dd6 --- /dev/null +++ b/tests/server/package.json @@ -0,0 +1,15 @@ +{ + "name": "server", + "version": "1.0.0", + "description": "Test server for Laravel Uptime Monitor", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "MIT", + "dependencies": { + "body-parser": "^1.18.2", + "express": "^4.16.2" + } +} diff --git a/tests/server/server.js b/tests/server/server.js new file mode 100644 index 0000000..eac7d56 --- /dev/null +++ b/tests/server/server.js @@ -0,0 +1,106 @@ +"use strict"; + +let app = require('express')(); + +app.get('/', function (request, response) { + response.end('txt disallowedmeta disallowedheader disallowedLink1Link2Link4EmailTelephoneNo followDisallow Custom User Agent'); +}); + +app.get('/link1', function (request, response) { + response.end('You are on link1External Link'); +}); + +app.get('/javascript', function (request, response) { + response.end('This page can only be reached if JavaScript is being executed'); +}); + +app.get('/link1-next', function (request, response) { + response.end('You are on link1-next. Next page of link1'); +}); + +app.get('/link1-prev', function (request, response) { + response.end('You are on link1-prev. Previous page of link1'); +}); + +app.get('/nofollow', function (request, response) { + response.end('This page should not be crawled'); +}); + +app.get('/link2', function (request, response) { + response.end('You are on link2Link3SubdomainSubdomain2'); +}); + +app.get('/link3', function (request, response) { + response.end('You are on link3not exists'); +}); + +app.get('/dir/link4', function (request, response) { + response.end('You are on /dir/link4link 5'); +}); + +app.get('/dir/link5', function (request, response) { + response.end('You are on /dir/link5link 6'); +}); + +app.get('/dir/subdir/link6', function (request, response) { + response.end('You are on /dir/subdir/link6link 1'); +}); + +app.get('/invalid-url', function (request, response) { + response.end('There is an invalid url'); +}); + +app.get('/txt-disallow', function (request, response) { + response.end('Not allowed'); +}); + +app.get('/txt-disallow-custom-user-agent', function (request, response) { + response.end('Not allowed for Custom User Agent'); +}); + +app.get('/meta-follow', function (request, response) { + response.end('\n\nNo follow'); +}); + +app.get('/meta-nofollow', function (request, response) { + response.end('\n\nno follow it'); +}); + +app.get('/dir1/internal-redirect-entry/', function (request, response) { + response.end('trapped trap-start'); +}); + +app.get('/dir1/internal-redirect/trap/', function (request, response) { + response.redirect(301, '/dir1/internal-redirect-entry/'); +}); + +app.get('/dir1/loop-generator/internal-redirect/trapped/', function (request, response) { + response.end('It should be crawled once'); +}); + +app.get('/meta-nofollow-target', function (request, response) { + response.end('No followable'); +}); + +app.get('/header-disallow', function (request, response) { + response.set({'X-Robots-Tag': '*: noindex'}); + + response.end('disallow by header'); +}); + +app.get('/robots.txt', function (req, res) { + var html = 'User-agent: *\n' + + 'Disallow: /txt-disallow\n' + + 'User-agent: my-agent\n' + + 'Disallow: /txt-disallow\n' + + 'Disallow: /txt-disallow-custom-user-agent'; + + res.end(html); +}); + +let server = app.listen(8080, function () { + const host = 'localhost'; + const port = server.address().port; + + console.log('Testing server listening at http://%s:%s', host, port); +}); diff --git a/tests/server/start_server.sh b/tests/server/start_server.sh new file mode 100755 index 0000000..fd39276 --- /dev/null +++ b/tests/server/start_server.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +if [ -z ${TRAVIS_JOB_ID} ]; then + # not running under travis, stay in foreground until stopped + node server.js +else + cd tests/server + + npm install + # running under travis, daemonize + (node server.js &) || /bin/true +fi