From cfe959ed9c2b00a04b68f836ad7bc75eaf49d268 Mon Sep 17 00:00:00 2001 From: greg mobile n900t Date: Wed, 22 Jul 2015 19:18:54 -0700 Subject: [PATCH] added basic scraper from http://www.jacobward.co.uk/working-with-the-scraped-data-part-2/ --- scraper.php | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scraper.php diff --git a/scraper.php b/scraper.php new file mode 100644 index 0000000..630c74d --- /dev/null +++ b/scraper.php @@ -0,0 +1,36 @@ + TRUE, // Setting cURL's option to return the webpage data + CURLOPT_FOLLOWLOCATION => TRUE, // Setting cURL to follow 'location' HTTP headers + CURLOPT_AUTOREFERER => TRUE, // Automatically set the referer where following 'location' HTTP headers + CURLOPT_CONNECTTIMEOUT => 120, // Setting the amount of time (in seconds) before the request times out + CURLOPT_TIMEOUT => 120, // Setting the maximum amount of time for cURL to execute queries + CURLOPT_MAXREDIRS => 10, // Setting the maximum number of redirections to follow + CURLOPT_USERAGENT => "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1a2pre) Gecko/2008073000 Shredder/3.0a2pre ThunderBrowse/3.2.1.8", // Setting the useragent + CURLOPT_URL => $url, // Setting cURL's URL option with the $url variable passed into the function + ); + + $ch = curl_init(); // Initialising cURL + curl_setopt_array($ch, $options); // Setting cURL's options using the previously assigned array data in $options + $data = curl_exec($ch); // Executing the cURL request and assigning the returned data to the $data variable + curl_close($ch); // Closing cURL + return $data; // Returning the data from the function + } + + + + // Defining the basic scraping function + function scrape_between($data, $start, $end){ + $data = stristr($data, $start); // Stripping all data from before $start + $data = substr($data, strlen($start)); // Stripping $start + $stop = stripos($data, $end); // Getting the position of the $end of the data to scrape + $data = substr($data, 0, $stop); // Stripping all data from after and including the $end of the data to scrape + return $data; // Returning the scraped data from the function + } + + +?>