-
Notifications
You must be signed in to change notification settings - Fork 3
/
class.curl.php
299 lines (270 loc) · 9.95 KB
/
class.curl.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
<?php
/**
*
* Simple cURL class to fetch a web page
* Properly redirects even with safe mode and basedir restrictions
* Can provide simple post options to a page
*
* Load class
* Initiate as
* - $fetch_data = new cURL_fetch_web_data();
* - optionaly pass an array of cURL options and redirect count
* - cURL_fetch_web_data(cURL options array, Max redirects);
* - $fetch_data = new cURL_fetch_web_data(array(CURLOPT_SSL_VERIFYPEER => 1), 5);
*
* Make the call
* - $fetch_data('http://www.simplemachines.org'); // fetch a page
* - $fetch_data('http://www.simplemachines.org', array('user' => 'name', 'password' => 'password')); // post to a page
* - $fetch_data('http://www.simplemachines.org', parameter1¶meter2¶meter3); // post to a page
*
*
* Get the data
* - $fetch_data->result('body'); // just the page content
* - $fetch_data->result(); // an array of results, body, header, http result codes
* - $fetch_data->result_raw(); // show all results of all calls (in the event of a redirect)
* - $fetch_data->result_raw(0); // show all results of call x
*
* @package SMF
* @author Simple Machines http://www.simplemachines.org
* @copyright 2011 Simple Machines
* @license http://www.simplemachines.org/about/smf/license.php BSD
*
* @version 2.1 Alpha 1
*/
class curl_fetch_web_data
{
private $default_options = array(
CURLOPT_RETURNTRANSFER => 1, // Get returned value as a string (don't output it)
CURLOPT_HEADER => 1, // We need the headers to do our own redirect
CURLOPT_FOLLOWLOCATION => 0, // Don't follow, we will do it ourselves so safe mode and open_basedir will dig it
CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko Firefox/11.0', // set a normal looking useragent
CURLOPT_CONNECTTIMEOUT => 15, // Don't wait forever on a connection
CURLOPT_TIMEOUT => 90, // A page should load in this amount of time
CURLOPT_MAXREDIRS => 5, // stop after this many redirects
CURLOPT_ENCODING => 'gzip,deflate', // accept gzip and decode it
CURLOPT_SSL_VERIFYPEER => 0, // stop cURL from verifying the peer's certificate
CURLOPT_SSL_VERIFYHOST => 0, // stop cURL from verifying the peer's host
CURLOPT_POST => 0, // no post data unless its passed
);
/**
* Start the curl object
* - allow for user override values
*
* @param type $options, cURL options as an array
* @param type $max_redirect, use to overide the default of 3
*/
public function __construct($options = array(), $max_redirect = 3)
{
// Initialize class variables
$this->max_redirect = intval($max_redirect);
$this->user_options = $options;
}
/**
* Main calling function,
* - will request the page data from a given $url
* - optionally will post data to the page form if post data is supplied
* - passed arrays will be converted to a post string joined with &'s
* - calls set_options to set the curl opts array values based on the defaults and user input
*
* @param type $url, the site we are going to fetch
* @param type $post_data, any post data as form name => value
*/
public function get_url_data($url, $post_data = array())
{
// POSTing some data perhaps?
if (!empty($post_data) && is_array($post_data))
$this->post_data = $this->build_post_data($post_data);
elseif (!empty($post_data))
$this->post_data = trim($post_data);
// set the options and get it
$this->set_options();
$this->curl_request(str_replace(' ', '%20', $url));
return $this;
}
/**
* Makes the actual cURL call
* - stores responses (url, code, error, headers, body) in the response array
* - detects 301, 302, 307 codes and will redirect to the given response header location
*
* @param type $url, site to fetch
* @param type $redirect, flag to indicate if this was a redirect request or not
* @return boolean
*/
private function curl_request($url, $redirect = false)
{
// we do have a url I hope
if ($url == '')
return false;
else
$this->options[CURLOPT_URL] = $url;
// if we have not already been redirected, set it up so we can if needed
if (!$redirect)
{
$this->current_redirect = 1;
$this->response = array();
}
// Initialize the curl object and make the call
$cr = curl_init();
curl_setopt_array($cr, $this->options);
curl_exec($cr);
// Get what was returned
$curl_info = curl_getinfo($cr);
$curl_content = curl_multi_getcontent($cr);
$url = $curl_info['url']; // Last effective URL
$http_code = $curl_info['http_code']; // Last HTTP code
$body = (!curl_error($cr)) ? substr($curl_content, $curl_info['header_size']) : false;
$error = (curl_error($cr)) ? curl_error($cr) : false;
// close this request
curl_close($cr);
// store this 'loops' data, someone may want all of these :O
$this->response[] = array(
'url' => $url,
'code' => $http_code,
'error' => $error,
'headers' => isset($this->headers) ? $this->headers : false,
'body' => $body,
);
// If this a redirect with a location header and we have not given up, then do it again
if (preg_match('~30[127]~i', $http_code) === 1 && $this->headers['location'] != '' && $this->current_redirect <= $this->max_redirect)
{
$this->current_redirect++;
$header_location = $this->get_redirect_url($url, $this->headers['location']);
$this->redirect($header_location, $url);
}
}
/**
* Used if being redirected to ensure we have a fully qualified address
*
* @param type $last_url, where we went to
* @param type $new_url, where we were redirected to
* @return new url location
*/
private function get_redirect_url($last_url = '', $new_url = '')
{
// Get the elements for these urls
$last_url_parse = parse_url($last_url);
$new_url_parse = parse_url($new_url);
// redirect headers are often incomplete or relative so we need to make sure they are fully qualified
$new_url_parse['scheme'] = isset($new_url_parse['scheme']) ? $new_url_parse['scheme'] : $last_url_parse['scheme'];
$new_url_parse['host'] = isset($new_url_parse['host']) ? $new_url_parse['host'] : $last_url_parse['host'];
$new_url_parse['path'] = isset($new_url_parse['path']) ? $new_url_parse['path'] : $last_url_parse['path'];
$new_url_parse['query'] = isset($new_url_parse['query']) ? $new_url_parse['query'] : '';
// Build the new URL that was in the http header
return $new_url_parse['scheme'] . '://' . $new_url_parse['host'] . $new_url_parse['path'] . (!empty($new_url_parse['query']) ? '?' . $new_url_parse['query'] : '');
}
/**
* Used to return the results to the calling program
* - called as ->result() will return the full final array
* - called as ->result('body') to just return the page source of the result
*
* @param type $area, used to return an area such as body, header, error
* @return type
*/
public function result($area = '')
{
$max_result = count($this->response) - 1;
// just return a specifed area or the entire result?
if ($area == '')
return $this->response[$max_result];
else
return isset($this->response[$max_result][$area]) ? $this->response[$max_result][$area] : $this->response[$max_result];
}
/**
* Will return all results from all loops (redirects)
* - Can be call as ->result_raw(x) where x is a specific loop results.
* - Call as ->result_raw() for everything.
*
* @param type $response_number
* @return type
*/
public function result_raw($response_number = '')
{
if (!is_numeric($response_number))
return $this->response;
else
{
$response_number = min($response_number, count($this->response) - 1);
return $this->response[$response_number];
}
}
/**
* Takes supplied POST data and url encodes it
* - forms the date (for post) in to a string var=xyz&var2=abc&var3=123
* - drops vars with @ since we don't support sending files (uploading)
*
* @param type $post_data
* @return type
*/
private function build_post_data($post_data)
{
if (is_array($post_data))
{
$postvars = array();
// build the post data, drop ones with leading @'s since those can be used to send files, we don't support that.
foreach ($post_data as $name => $value)
$postvars[] = $name . '=' . urlencode($value[0] == '@' ? '' : $value);
return implode('&', $postvars);
}
else
return $post_data;
}
/**
* Sets the final cURL options for the current call
* - overwrites our default values with user supplied ones or appends new user ones to what we have
* - sets the callback function now that $this is existing
*
*/
private function set_options()
{
// Callback to parse the returned headers, if any
$this->default_options[CURLOPT_HEADERFUNCTION] = array($this, 'header_callback');
// Any user options to account for
if (is_array($this->user_options))
{
$keys = array_merge(array_keys($this->default_options), array_keys($this->user_options));
$vals = array_merge($this->default_options, $this->user_options);
$this->options = array_combine($keys, $vals);
}
else
$this->options = $this->default_options;
// POST data options, here we don't allow any overide
if (isset($this->post_data))
{
$this->options[CURLOPT_POST] = 1;
$this->options[CURLOPT_POSTFIELDS] = $this->post_data;
}
}
/**
* Called to initiate a redirect from a 301, 302 or 307 header
* - resets the cURL options for the loop, sets the referrer flag
*
* @param type $target_url
* @param type $referer_url
*/
private function redirect($target_url, $referer_url)
{
// no no I last saw that over there ... really, 301, 302, 307
$this->set_options();
$this->options[CURLOPT_REFERER] = $referer_url;
$this->curl_request($target_url, true);
}
/**
* Callback function to parse returned headers
* - lowercases everything to make it consistent
*
* @param type $cr
* @param type $header
* @return type
*/
private function header_callback($cr, $header)
{
$_header = trim($header);
$temp = explode(': ', $_header, 2);
// set proper headers only
if (isset($temp[0]) && isset($temp[1]))
$this->headers[strtolower($temp[0])] = strtolower(trim($temp[1]));
// return the length of what was passed unless you want a Failed writing header error ;)
return strlen($header);
}
}
?>