-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
1,060 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
<?php | ||
|
||
namespace Baseapp\Library; | ||
|
||
/** | ||
* Sitemap Library | ||
* | ||
* @package base-app | ||
* @category Library | ||
* @version 2.0 | ||
*/ | ||
class Sitemap | ||
{ | ||
|
||
/** | ||
* @var DOMDocument | ||
*/ | ||
protected $_xml; | ||
|
||
/** | ||
* @var DOMElement | ||
*/ | ||
protected $_root; | ||
|
||
/** | ||
* @var boolean Enable gzip compression | ||
*/ | ||
public $gzip = false; | ||
|
||
/** | ||
* @var integer Compression level | ||
*/ | ||
public $compression = 7; | ||
|
||
/** | ||
* Setup the XML document | ||
*/ | ||
public function __construct() | ||
{ | ||
// Load sitemap config from config.ini | ||
if (isset(\Phalcon\DI::getDefault()->getShared('config')->sitemap) && $config = \Phalcon\DI::getDefault()->getShared('config')->sitemap) { | ||
foreach ($config as $key => $value) { | ||
$this->$key = $value; | ||
} | ||
} | ||
|
||
// XML document | ||
$this->_xml = new \DOMDocument('1.0', 'UTF-8'); | ||
|
||
// Attributes | ||
$this->_xml->formatOutput = true; | ||
|
||
// Root element | ||
$this->_root = $this->_xml->createElement('urlset'); | ||
|
||
// Append to XML document | ||
$this->_xml->appendChild($this->_root); | ||
} | ||
|
||
/** | ||
* @param Sitemap_URL $object | ||
*/ | ||
public function add(Sitemap\URL $object) | ||
{ | ||
$url = $object->create(); | ||
|
||
// Decorate the urlset | ||
$object->root($this->_root); | ||
|
||
// Append URL to root element | ||
$this->_root->appendChild($this->_xml->importNode($url, true)); | ||
} | ||
|
||
/** | ||
* Ping web services | ||
* | ||
* @param string $sitemap Full website path to sitemap | ||
* @return array Service key with the HTTP response code as the value. | ||
*/ | ||
public static function ping($sitemap) | ||
{ | ||
if (!isset(\Phalcon\DI::getDefault()->getShared('config')->sitemap->ping)) { | ||
return null; | ||
} | ||
|
||
// URLs to ping | ||
$ping = \Phalcon\DI::getDefault()->getShared('config')->sitemap->ping; | ||
|
||
// Main handle | ||
$master = curl_multi_init(); | ||
|
||
$handles = array(); | ||
|
||
// Create handles for each URL and add them to the main handle. | ||
foreach ($ping as $key => $val) { | ||
$handles[$key] = curl_init(sprintf($val, $sitemap)); | ||
|
||
curl_setopt($handles[$key], CURLOPT_FOLLOWLOCATION, true); | ||
curl_setopt($handles[$key], CURLOPT_RETURNTRANSFER, true); | ||
curl_setopt($handles[$key], CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux x86_64; en-GB; rv:1.9.2.3) Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3'); | ||
|
||
curl_multi_add_handle($master, $handles[$key]); | ||
} | ||
|
||
do { | ||
curl_multi_exec($master, $still_running); | ||
} while ($still_running > 0); | ||
|
||
$info = array(); | ||
|
||
// Build an array of the execution information. | ||
foreach (array_keys($ping) as $key) { | ||
$info[$key] = curl_getinfo($handles[$key], CURLINFO_HTTP_CODE); | ||
|
||
// Close the handles while we're here. | ||
curl_multi_remove_handle($master, $handles[$key]); | ||
} | ||
|
||
// and finally close the master handle. | ||
curl_multi_close($master); | ||
|
||
return $info; | ||
} | ||
|
||
/** | ||
* UTF8 encode a string | ||
* | ||
* @access public | ||
* @param string $string | ||
* @return string | ||
*/ | ||
public static function encode($string) | ||
{ | ||
$string = htmlspecialchars($string, ENT_QUOTES, 'UTF-8'); | ||
|
||
// This is a rather ugly hack. Basically urlencode and rawurlencode use RFC 1738 | ||
// encoding. This brings it up to date (RFC 3986); The newer RFC has a different | ||
// set of reserved characters. Credit goes to davis dot peixoto at gmail dot com | ||
// God bless PHP comments. | ||
$entities = array('%21', '%2A', '%27', '%28', '%29', '%3B', '%3A', '%40', | ||
'%26', '%3D', '%2B', '%24', '%2C', '%2F', '%3F', '%23', '%5B', '%5D'); | ||
|
||
$replacements = array('!', '*', "'", "(", ")", ";", ":", "@", "&", "=", "+", | ||
"$", ",", "/", "?", "#", "[", "]"); | ||
|
||
$string = str_replace($entities, $replacements, rawurlencode($string)); | ||
|
||
return str_replace(''', ''', $string); | ||
} | ||
|
||
/** | ||
* Format a unix timestamp into W3C Datetime | ||
* | ||
* @access public | ||
* @see http://www.w3.org/TR/NOTE-datetime | ||
* @param string $unix Unixtimestamp | ||
* @return string W3C Datetime | ||
*/ | ||
public static function date_format($unix) | ||
{ | ||
if (is_numeric($unix) AND $unix <= PHP_INT_MAX) { | ||
return date('Y-m-d\TH:i:sP', $unix); | ||
} | ||
|
||
throw new \InvalidArgumentException('Must be a unix timestamp'); | ||
} | ||
|
||
/** | ||
* @return string Either an XML document or a gzipped file | ||
*/ | ||
public function render() | ||
{ | ||
// Default uncompressed | ||
$response = $this->_xml->saveXML(); | ||
|
||
if ($this->gzip) { | ||
// Try and gzip the file before we send it off. | ||
try { | ||
$response = gzencode($response, $this->compression); | ||
} catch (ErrorException $e) { | ||
\Baseapp\Bootstrap::exception($e); | ||
} | ||
} | ||
|
||
return $response; | ||
} | ||
|
||
/** | ||
* @return string XML output. | ||
*/ | ||
public function __toString() | ||
{ | ||
return $this->render(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
<?php | ||
|
||
namespace Baseapp\Library\Sitemap; | ||
|
||
class Code implements \Baseapp\Library\Sitemap\SitemapInterface | ||
{ | ||
|
||
private $_attributes = array( | ||
'filetype' => null, | ||
'license' => null, | ||
'filename' => null, | ||
'packageurl' => null, | ||
'packagemap' => null | ||
); | ||
protected $_licenses = array( | ||
'aladdin', 'artistic', 'apache', 'apple', 'bsd', 'cpl', 'gpl', 'lgpl', 'disclaimer', | ||
'ibm', 'lucent', 'mit', 'mozilla', 'nasa', 'python', 'qpl', 'sleepycat', 'zope' | ||
); | ||
protected $_archives = array( | ||
'.tar', '.tar.z', '.tar.gz', '.tgz', '.tar.bz2', '.tbz', '.tbz2', '.zip' | ||
); | ||
|
||
/** | ||
* @param string $type Case-insensitive. The value "archive" indicates that | ||
* the file is an archive file. For source code files, the value defines the | ||
* the source code language. Examples include "C", "Python", "C#", "Java", "Vim". | ||
* For source code language, the Short Name, as specified in the list of supported | ||
* languages, must be used. The value must be printable ASCII characters, and | ||
* no white space is allowed. | ||
* | ||
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=75252 | ||
*/ | ||
public function set_file_type($type) | ||
{ | ||
$type = (string) $type; | ||
|
||
if (!preg_match('/^[a-z][a-z0-9+#]*$/i', $type)) { | ||
throw new \InvalidArgumentException('Type must only contain a-z, 0-9, + and #'); | ||
} | ||
|
||
$this->_attributes['filetype'] = $type; | ||
|
||
return $this; | ||
} | ||
|
||
/** | ||
* @param string $license Case-insensitive. The name of the software license. | ||
* For archive files, this indicates the default license for files in the archive. | ||
* Examples include "GPL", "BSD", "Python", "disclaimer". You must use the Short | ||
* Name, as specified in the list of supported licenses. | ||
* | ||
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=75256 | ||
*/ | ||
public function set_license($license) | ||
{ | ||
$license = (string) $license; | ||
|
||
if (!in_array($license, $this->_licenses)) { | ||
throw new \InvalidArgumentException('Invalid license type. See http://www.google.com/support/webmasters/bin/answer.py?answer=75256 for details'); | ||
} | ||
|
||
$this->_attributes['license'] = $license; | ||
|
||
return $this; | ||
} | ||
|
||
/** | ||
* @param string $file_name The name of the actual file. This is useful if the | ||
* URL ends in something like download.php?id=1234 instead of the actual filename. | ||
* The name can contain any character except "/". If the file is an archive file, | ||
* it will be indexed only if it has one of the supported archive suffixes. | ||
* | ||
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=75259 | ||
*/ | ||
public function set_file_name($file_name) | ||
{ | ||
$file_name = (string) $file_name; | ||
|
||
if ($this->_attributes['filetype'] === 'archive') { | ||
if (!in_array(pathinfo($file_name, PATHINFO_EXTENSION), $this->_archives)) { | ||
throw new \InvalidArgumentException('Not a valid archive type'); | ||
} | ||
} | ||
|
||
$this->_attributes['filename'] = basename($file_name); | ||
|
||
return $this; | ||
} | ||
|
||
/** | ||
* @param <type> $package_type For use only when the value of codesearch:filetype | ||
* is not "archive". The URL truncated at the top-level directory for the package. | ||
* For example, the file http://path/Foo/1.23/bar/file.c could have the package URL | ||
* http://path/Foo/1.23. All files in a package should have the same packageurl. | ||
* This tells us which files belong together. | ||
*/ | ||
public function set_package_url($package_type) | ||
{ | ||
$this->_attributes['packageurl'] = $package_type; | ||
} | ||
|
||
/** | ||
* @param string $package_map Case-sensitive. For use only when codesearch:filetype | ||
* is "archive". The name of the packagemap file inside the archive. Just like a | ||
* Sitemap is a list of files on a web site, a packagemap is a list of files in | ||
* a package. | ||
* | ||
* @see http://www.google.com/help/codesearch_packagemap.html | ||
*/ | ||
public function set_package_map($package_map) | ||
{ | ||
$this->_attributes['packagemap'] = $package_map; | ||
} | ||
|
||
public function create() | ||
{ | ||
// Here we need to create a new DOMDocument. This is so we can re-import the | ||
// DOMElement at the other end. | ||
$document = new \DOMDocument; | ||
|
||
// Mobile element | ||
$code = $document->createElement('codesearch:codesearch'); | ||
|
||
// Append attributes | ||
foreach ($this->_attributes as $name => $value) { | ||
if (null !== $value) { | ||
$code->appendChild($document->createElement('codesearch:' . $name, $value)); | ||
} | ||
} | ||
|
||
return $code; | ||
} | ||
|
||
public function root(\DOMElement & $root) | ||
{ | ||
$root->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:codesearch', 'http://www.google.com/codesearch/schemas/sitemap/1.0'); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
<?php | ||
|
||
namespace Baseapp\Library\Sitemap; | ||
|
||
class Geo implements \Baseapp\Library\Sitemap\SitemapInterface | ||
{ | ||
|
||
protected $_format = null; | ||
protected $_allowed_formats = array( | ||
'kml', 'kmz', 'georss' | ||
); | ||
|
||
/** | ||
* @param string $format Case-insensitive. Specifies the format of the geo content. | ||
* Examples include "kml" and "georss". Only supported formats will be indexed. | ||
* | ||
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=94556 | ||
*/ | ||
public function set_format($format) | ||
{ | ||
if (in_array((string) $format, $this->_allowed_formats)) { | ||
$this->_format = $format; | ||
|
||
return $this; | ||
} | ||
|
||
throw new \InvalidArgumentException('The format must either be kml, kmlz or georss'); | ||
} | ||
|
||
public function create() | ||
{ | ||
// Here we need to create a new DOMDocument. This is so we can re-import the | ||
// DOMElement at the other end. | ||
$document = new \DOMDocument; | ||
|
||
// Mobile element | ||
$geo = $document->createElement('geo:geo'); | ||
|
||
// Add format | ||
$geo->appendChild($document->createElement('geo:format', $this->_format)); | ||
|
||
return $geo; | ||
} | ||
|
||
public function root(\DOMElement & $root) | ||
{ | ||
$root->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:geo', 'http://www.google.com/geo/schemas/sitemap/1.0'); | ||
} | ||
|
||
} |
Oops, something went wrong.