From 0a124c693ef1bd2bebcf6560494487b7272fbfc9 Mon Sep 17 00:00:00 2001 From: Dmitrij Vasilcenko Date: Sun, 9 Apr 2017 20:39:15 +0300 Subject: [PATCH 1/6] crawler init --- .idea/workspace.xml | 322 ++++ composer.lock | 71 +- src/AppBundle/AppBundle.php | 2 +- .../Controller/DefaultController.php | 49 +- web/test.txt | 1647 +++++++++++++++++ 5 files changed, 2050 insertions(+), 41 deletions(-) create mode 100644 .idea/workspace.xml create mode 100644 web/test.txt diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..26b632b --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,322 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + DEFINITION_ORDER + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + project + + + true + + + + DIRECTORY + + false + + + + + + + + + 1491753729633 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/composer.lock b/composer.lock index c3cb73c..a2a7bd1 100644 --- a/composer.lock +++ b/composer.lock @@ -67,35 +67,35 @@ }, { "name": "doctrine/annotations", - "version": "v1.2.7", + "version": "v1.4.0", "source": { "type": "git", "url": "https://github.com/doctrine/annotations.git", - "reference": "f25c8aab83e0c3e976fd7d19875f198ccf2f7535" + "reference": "54cacc9b81758b14e3ce750f205a393d52339e97" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/doctrine/annotations/zipball/f25c8aab83e0c3e976fd7d19875f198ccf2f7535", - "reference": "f25c8aab83e0c3e976fd7d19875f198ccf2f7535", + "url": "https://api.github.com/repos/doctrine/annotations/zipball/54cacc9b81758b14e3ce750f205a393d52339e97", + "reference": "54cacc9b81758b14e3ce750f205a393d52339e97", "shasum": "" }, "require": { "doctrine/lexer": "1.*", - "php": ">=5.3.2" + "php": "^5.6 || ^7.0" }, "require-dev": { "doctrine/cache": "1.*", - "phpunit/phpunit": "4.*" + "phpunit/phpunit": "^5.7" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.3.x-dev" + "dev-master": "1.4.x-dev" } }, "autoload": { - "psr-0": { - "Doctrine\\Common\\Annotations\\": "lib/" + "psr-4": { + "Doctrine\\Common\\Annotations\\": "lib/Doctrine/Common/Annotations" } }, "notification-url": "https://packagist.org/downloads/", @@ -131,7 +131,7 @@ "docblock", "parser" ], - "time": "2015-08-31T12:32:49+00:00" + "time": "2017-02-24T16:22:25+00:00" }, { "name": "doctrine/cache", @@ -205,28 +205,29 @@ }, { "name": "doctrine/collections", - "version": "v1.3.0", + "version": "v1.4.0", "source": { "type": "git", "url": "https://github.com/doctrine/collections.git", - "reference": "6c1e4eef75f310ea1b3e30945e9f06e652128b8a" + "reference": "1a4fb7e902202c33cce8c55989b945612943c2ba" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/doctrine/collections/zipball/6c1e4eef75f310ea1b3e30945e9f06e652128b8a", - "reference": "6c1e4eef75f310ea1b3e30945e9f06e652128b8a", + "url": "https://api.github.com/repos/doctrine/collections/zipball/1a4fb7e902202c33cce8c55989b945612943c2ba", + "reference": "1a4fb7e902202c33cce8c55989b945612943c2ba", "shasum": "" }, "require": { - "php": ">=5.3.2" + "php": "^5.6 || ^7.0" }, "require-dev": { - "phpunit/phpunit": "~4.0" + "doctrine/coding-standard": "~0.1@dev", + "phpunit/phpunit": "^5.7" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.2.x-dev" + "dev-master": "1.3.x-dev" } }, "autoload": { @@ -267,20 +268,20 @@ "collections", "iterator" ], - "time": "2015-04-14T22:21:58+00:00" + "time": "2017-01-03T10:49:41+00:00" }, { "name": "doctrine/common", - "version": "v2.6.2", + "version": "v2.7.2", "source": { "type": "git", "url": "https://github.com/doctrine/common.git", - "reference": "7bce00698899aa2c06fe7365c76e4d78ddb15fa3" + "reference": "930297026c8009a567ac051fd545bf6124150347" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/doctrine/common/zipball/7bce00698899aa2c06fe7365c76e4d78ddb15fa3", - "reference": "7bce00698899aa2c06fe7365c76e4d78ddb15fa3", + "url": "https://api.github.com/repos/doctrine/common/zipball/930297026c8009a567ac051fd545bf6124150347", + "reference": "930297026c8009a567ac051fd545bf6124150347", "shasum": "" }, "require": { @@ -289,10 +290,10 @@ "doctrine/collections": "1.*", "doctrine/inflector": "1.*", "doctrine/lexer": "1.*", - "php": "~5.5|~7.0" + "php": "~5.6|~7.0" }, "require-dev": { - "phpunit/phpunit": "~4.8|~5.0" + "phpunit/phpunit": "^5.4.6" }, "type": "library", "extra": { @@ -340,7 +341,7 @@ "persistence", "spl" ], - "time": "2016-11-30T16:50:46+00:00" + "time": "2017-01-13T14:02:13+00:00" }, { "name": "doctrine/dbal", @@ -1976,20 +1977,21 @@ }, { "name": "twig/twig", - "version": "v1.33.0", + "version": "v2.3.0", "source": { "type": "git", "url": "https://github.com/twigphp/Twig.git", - "reference": "05cf49921b13f6f01d3cfdf9018cfa7a8086fd5a" + "reference": "9718186a5df85a4f7917e78d3ffcabc204c75d25" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/twigphp/Twig/zipball/05cf49921b13f6f01d3cfdf9018cfa7a8086fd5a", - "reference": "05cf49921b13f6f01d3cfdf9018cfa7a8086fd5a", + "url": "https://api.github.com/repos/twigphp/Twig/zipball/9718186a5df85a4f7917e78d3ffcabc204c75d25", + "reference": "9718186a5df85a4f7917e78d3ffcabc204c75d25", "shasum": "" }, "require": { - "php": ">=5.2.7" + "php": "^7.0", + "symfony/polyfill-mbstring": "~1.0" }, "require-dev": { "psr/container": "^1.0", @@ -1999,7 +2001,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "1.33-dev" + "dev-master": "2.3-dev" } }, "autoload": { @@ -2034,7 +2036,7 @@ "keywords": [ "templating" ], - "time": "2017-03-22T15:40:09+00:00" + "time": "2017-03-22T15:41:51+00:00" } ], "packages-dev": [ @@ -2163,8 +2165,5 @@ "platform": { "php": ">=5.5.9" }, - "platform-dev": [], - "platform-overrides": { - "php": "5.5.9" - } + "platform-dev": [] } diff --git a/src/AppBundle/AppBundle.php b/src/AppBundle/AppBundle.php index 05123b6..f70c361 100644 --- a/src/AppBundle/AppBundle.php +++ b/src/AppBundle/AppBundle.php @@ -6,4 +6,4 @@ class AppBundle extends Bundle { -} +} \ No newline at end of file diff --git a/src/AppBundle/Controller/DefaultController.php b/src/AppBundle/Controller/DefaultController.php index 5216afe..450c320 100644 --- a/src/AppBundle/Controller/DefaultController.php +++ b/src/AppBundle/Controller/DefaultController.php @@ -4,7 +4,9 @@ use Sensio\Bundle\FrameworkExtraBundle\Configuration\Route; use Symfony\Bundle\FrameworkBundle\Controller\Controller; +use Symfony\Component\HttpFoundation\JsonResponse; use Symfony\Component\HttpFoundation\Request; +use Symfony\Component\DomCrawler\Crawler; class DefaultController extends Controller { @@ -13,9 +15,48 @@ class DefaultController extends Controller */ public function indexAction(Request $request) { - // replace this example code with whatever you need - return $this->render('default/index.html.twig', [ - 'base_dir' => realpath($this->getParameter('kernel.root_dir').'/..').DIRECTORY_SEPARATOR, - ]); + //set_time_limit(60); + $hasItems = true; + $cars = []; + while ( $hasItems ) { + $pageNumber = 1; + $url = "https://autoplius.lt/skelbimai/naudoti-automobiliai?make_id=99&page_nr=" . $pageNumber; + $html = $this->getHtml($url); + //$html = file_get_contents('test.txt'); + + $hasItems = false; + $crawler = new Crawler($html); + $crawler = $crawler->filter('.item-section'); + + foreach ($crawler as $domRow) { + $hasItems = true; + $row = new Crawler($domRow); + $title = $row->filter('.title-list a')->text(); + $price = trim($row->filter('.price-list')->text()); + $date = $row->filter('.param-list span[title="Pagaminimo data"]')->text(); + $car = [$title, $price, $date]; + $cars[] = $car; + } + + $pageNumber++; + sleep(1); + + // page 5 not exist so cralwer will stop + if ($pageNumber == 5) { + break; + } + + } + return new JsonResponse($cars); + } + + public function getHtml($url) { + $curl = curl_init($url); + curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); + $html = curl_exec($curl); + curl_close($curl); + return $html; } + } diff --git a/web/test.txt b/web/test.txt new file mode 100644 index 0000000..362a9a8 --- /dev/null +++ b/web/test.txt @@ -0,0 +1,1647 @@ + +

+
+Audi Naudoti automobiliai | Autoplius.lt
+
+
+
+
+
+        
+        
+        
+        
+        
+    
+	
+	
+					
+					
+					
+					
+					
+					
+
+
+            
+                    
+            
+            
+
+            
+            
+
+		
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+
+ +
+
+
+ + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
+ + +
+ +
+
Audi modeliai
Paieškos tikslinimasi
Rezultatų siaurinimasi

+ + +
+ +
+ + + + +
+ + + +
+ + + + + + Audi Naudoti automobiliai logo

Audi Naudoti automobiliai

(3194)
Jus domina Audi parduodami naudoti automobiliai? Parduodamų Audi naudotų automobilių skelbimai pateikiami sąraše. Populiariausios parduodamų Audi naudotų automobilių markės pateikiamos kairėje pusėje. Parduodamų Audi naudotų automobilių kainos, nuotraukos ir kita informacija nuolatos atnaujinama. Naudotų Audi automobilių pardavimas autoplius.lt portale - geriausias sprendimas!
+ + +
+ +
+
+ +
+
+
+
+
+
+ + + + + + + + + + + + + +
+ +
+
+ +
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+
+ +
+ + + + +
    + + +
+
+ + + + + + + + + + + + + + + +
+
+ + + + + + +
+ +
+
+ +
+ + + +
+
+ + + + + + + + + + +
+
+
+ Svetainėje naudojami slapukai, kurie padeda užtikrinti jums teikiamų paslaugų kokybę. Tęsdami naršymą, jūs sutinkate su autoplius.lt slapukų politika. Daugiau apie slapukus. +
+
X
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + \ No newline at end of file From d3a10ab7a1ab281b56776b9f787d6cc5f163da7c Mon Sep 17 00:00:00 2001 From: Dmitrij Vasilcenko Date: Mon, 10 Apr 2017 20:54:46 +0300 Subject: [PATCH 2/6] variable mini fix --- .idea/autoinsanity.iml | 20 ++++++ .idea/modules.xml | 8 +++ .idea/vcs.xml | 6 ++ .idea/workspace.xml | 63 +++++++++++++------ .../Controller/DefaultController.php | 9 ++- 5 files changed, 83 insertions(+), 23 deletions(-) create mode 100644 .idea/autoinsanity.iml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/autoinsanity.iml b/.idea/autoinsanity.iml new file mode 100644 index 0000000..384d376 --- /dev/null +++ b/.idea/autoinsanity.iml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..3b35e49 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 26b632b..46fbf3d 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,7 +1,10 @@ - + + + + - + @@ -25,8 +28,8 @@ - - + + @@ -37,7 +40,7 @@ - @@ -218,6 +221,12 @@ + project + + + + + @@ -251,12 +260,13 @@ - @@ -264,20 +274,20 @@ - - + + - + - + - + - - + + @@ -292,11 +302,28 @@ + + + + + + + + + + + + + + + + + + - @@ -310,8 +337,8 @@ - - + + diff --git a/src/AppBundle/Controller/DefaultController.php b/src/AppBundle/Controller/DefaultController.php index 450c320..3cb5fd0 100644 --- a/src/AppBundle/Controller/DefaultController.php +++ b/src/AppBundle/Controller/DefaultController.php @@ -18,11 +18,11 @@ public function indexAction(Request $request) //set_time_limit(60); $hasItems = true; $cars = []; + $pageNumber = 1; while ( $hasItems ) { - $pageNumber = 1; $url = "https://autoplius.lt/skelbimai/naudoti-automobiliai?make_id=99&page_nr=" . $pageNumber; $html = $this->getHtml($url); - //$html = file_get_contents('test.txt'); +// $html = file_get_contents('test.txt'); $hasItems = false; $crawler = new Crawler($html); @@ -39,10 +39,9 @@ public function indexAction(Request $request) } $pageNumber++; - sleep(1); + sleep(2); - // page 5 not exist so cralwer will stop - if ($pageNumber == 5) { + if ($pageNumber > 3) { break; } From e8c2e5a208be2082612e1a2c418a9c60eaa5f067 Mon Sep 17 00:00:00 2001 From: Dmitrij Vasilcenko Date: Tue, 11 Apr 2017 16:26:45 +0300 Subject: [PATCH 3/6] deleting .idea files --- .gitignore | 1 + .idea/workspace.xml | 129 +++++++----------- .../Controller/DefaultController.php | 9 +- var/SymfonyRequirements.php | 0 var/cache/.gitkeep | 0 var/logs/.gitkeep | 0 var/sessions/.gitkeep | 0 7 files changed, 60 insertions(+), 79 deletions(-) mode change 100644 => 100755 var/SymfonyRequirements.php mode change 100644 => 100755 var/cache/.gitkeep mode change 100644 => 100755 var/logs/.gitkeep mode change 100644 => 100755 var/sessions/.gitkeep diff --git a/.gitignore b/.gitignore index 93821ad..1b23e27 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ !var/SymfonyRequirements.php /vendor/ /web/bundles/ +/.idea/* \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 46fbf3d..94e2512 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,8 +2,13 @@ + + + + + - + - - + + @@ -37,6 +42,16 @@ + + + + + + + + + + @@ -48,6 +63,7 @@ @@ -92,74 +108,6 @@ \ No newline at end of file diff --git a/src/AppBundle/Controller/DefaultController.php b/src/AppBundle/Controller/DefaultController.php index 3cb5fd0..6f0b627 100644 --- a/src/AppBundle/Controller/DefaultController.php +++ b/src/AppBundle/Controller/DefaultController.php @@ -34,7 +34,9 @@ public function indexAction(Request $request) $title = $row->filter('.title-list a')->text(); $price = trim($row->filter('.price-list')->text()); $date = $row->filter('.param-list span[title="Pagaminimo data"]')->text(); - $car = [$title, $price, $date]; + $car = ['title' => $title, + 'price' => $price, + 'date' => $date]; $cars[] = $car; } @@ -46,7 +48,10 @@ public function indexAction(Request $request) } } - return new JsonResponse($cars); + $response = new JsonResponse($cars); + $response->setEncodingOptions(JSON_PRETTY_PRINT); + + return $response; } public function getHtml($url) { diff --git a/var/SymfonyRequirements.php b/var/SymfonyRequirements.php old mode 100644 new mode 100755 diff --git a/var/cache/.gitkeep b/var/cache/.gitkeep old mode 100644 new mode 100755 diff --git a/var/logs/.gitkeep b/var/logs/.gitkeep old mode 100644 new mode 100755 diff --git a/var/sessions/.gitkeep b/var/sessions/.gitkeep old mode 100644 new mode 100755 From b3668ba7f91479034bc6c0e67f49d8a69a0b9289 Mon Sep 17 00:00:00 2001 From: Dmitrij Vasilcenko Date: Tue, 11 Apr 2017 16:31:50 +0300 Subject: [PATCH 4/6] ignoring .idea --- .gitignore | 2 +- .idea/autoinsanity.iml | 20 --- .idea/modules.xml | 8 - .idea/vcs.xml | 6 - .idea/workspace.xml | 324 ----------------------------------------- 5 files changed, 1 insertion(+), 359 deletions(-) delete mode 100644 .idea/autoinsanity.iml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml delete mode 100644 .idea/workspace.xml diff --git a/.gitignore b/.gitignore index 1b23e27..f4e8b9e 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ !var/SymfonyRequirements.php /vendor/ /web/bundles/ -/.idea/* \ No newline at end of file +.idea \ No newline at end of file diff --git a/.idea/autoinsanity.iml b/.idea/autoinsanity.iml deleted file mode 100644 index 384d376..0000000 --- a/.idea/autoinsanity.iml +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 3b35e49..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index 94e2512..0000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,324 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - true - DEFINITION_ORDER - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - project - - - - - - - - - - - - - - - - project - - - true - - - - DIRECTORY - - false - - - - - - - - - 1491753729633 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From 8d7bc1cc85eba41a7a6f16b2f153102c12d2d246 Mon Sep 17 00:00:00 2001 From: marttis21 Date: Sun, 16 Apr 2017 17:14:21 +0300 Subject: [PATCH 5/6] fixed and updated default controller --- .../Controller/DefaultController.php | 27 ++----------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/src/AppBundle/Controller/DefaultController.php b/src/AppBundle/Controller/DefaultController.php index 97e7411..86b0b5a 100644 --- a/src/AppBundle/Controller/DefaultController.php +++ b/src/AppBundle/Controller/DefaultController.php @@ -17,31 +17,8 @@ public function indexAction() { // display database entries $entityManager = $this->get('doctrine.orm.default_entity_manager'); - /*$repository = $entityManager->getRepository('AppBundle:Vehicle'); - $items = $repository->findAll();*/ - - - /* - * - * - */ - - $repository = $entityManager->getRepository('AppBundle:Model'); - $brand = new Brand(); - $brand->setName('ddddddd'); - $model = new Model(); - $model->setName('eedddee'); - - $brand->addModel($model); - $model->setBrand($brand); - - $entityManager->persist($model); - $entityManager->persist($brand); - - $entityManager->flush(); - - var_dump($brand); - //var_dump($model); + $repository = $entityManager->getRepository('AppBundle:Vehicle'); + $items = $repository->findAll(); return $this->render('AppBundle:default:list_items.html.twig', [ 'items' => $items, From 1ee15cdd2bd50d63ce09020adc75f94acd1d5773 Mon Sep 17 00:00:00 2001 From: marttis21 Date: Sun, 16 Apr 2017 17:17:19 +0300 Subject: [PATCH 6/6] removed unused parameters in config file --- app/config/config_dev.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/app/config/config_dev.yml b/app/config/config_dev.yml index 2f3bbdd..a616d33 100644 --- a/app/config/config_dev.yml +++ b/app/config/config_dev.yml @@ -32,9 +32,3 @@ monolog: #swiftmailer: # delivery_addresses: ['me@example.com'] - -khepin_yaml_fixtures: - # directory: Resources/FixtureData - resources: - - AppBundle/models - - AppBundle