Skip to content

Commit

Permalink
Library, Sitemap generator
Browse files Browse the repository at this point in the history
  • Loading branch information
mruz committed Jul 25, 2014
1 parent 907bfec commit 066f38d
Show file tree
Hide file tree
Showing 8 changed files with 1,060 additions and 0 deletions.
196 changes: 196 additions & 0 deletions app/common/library/Sitemap.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
<?php

namespace Baseapp\Library;

/**
* Sitemap Library
*
* @package base-app
* @category Library
* @version 2.0
*/
class Sitemap
{

/**
* @var DOMDocument
*/
protected $_xml;

/**
* @var DOMElement
*/
protected $_root;

/**
* @var boolean Enable gzip compression
*/
public $gzip = false;

/**
* @var integer Compression level
*/
public $compression = 7;

/**
* Setup the XML document
*/
public function __construct()
{
// Load sitemap config from config.ini
if (isset(\Phalcon\DI::getDefault()->getShared('config')->sitemap) && $config = \Phalcon\DI::getDefault()->getShared('config')->sitemap) {
foreach ($config as $key => $value) {
$this->$key = $value;
}
}

// XML document
$this->_xml = new \DOMDocument('1.0', 'UTF-8');

// Attributes
$this->_xml->formatOutput = true;

// Root element
$this->_root = $this->_xml->createElement('urlset');

// Append to XML document
$this->_xml->appendChild($this->_root);
}

/**
* @param Sitemap_URL $object
*/
public function add(Sitemap\URL $object)
{
$url = $object->create();

// Decorate the urlset
$object->root($this->_root);

// Append URL to root element
$this->_root->appendChild($this->_xml->importNode($url, true));
}

/**
* Ping web services
*
* @param string $sitemap Full website path to sitemap
* @return array Service key with the HTTP response code as the value.
*/
public static function ping($sitemap)
{
if (!isset(\Phalcon\DI::getDefault()->getShared('config')->sitemap->ping)) {
return null;
}

// URLs to ping
$ping = \Phalcon\DI::getDefault()->getShared('config')->sitemap->ping;

// Main handle
$master = curl_multi_init();

$handles = array();

// Create handles for each URL and add them to the main handle.
foreach ($ping as $key => $val) {
$handles[$key] = curl_init(sprintf($val, $sitemap));

curl_setopt($handles[$key], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($handles[$key], CURLOPT_RETURNTRANSFER, true);
curl_setopt($handles[$key], CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux x86_64; en-GB; rv:1.9.2.3) Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3');

curl_multi_add_handle($master, $handles[$key]);
}

do {
curl_multi_exec($master, $still_running);
} while ($still_running > 0);

$info = array();

// Build an array of the execution information.
foreach (array_keys($ping) as $key) {
$info[$key] = curl_getinfo($handles[$key], CURLINFO_HTTP_CODE);

// Close the handles while we're here.
curl_multi_remove_handle($master, $handles[$key]);
}

// and finally close the master handle.
curl_multi_close($master);

return $info;
}

/**
* UTF8 encode a string
*
* @access public
* @param string $string
* @return string
*/
public static function encode($string)
{
$string = htmlspecialchars($string, ENT_QUOTES, 'UTF-8');

// This is a rather ugly hack. Basically urlencode and rawurlencode use RFC 1738
// encoding. This brings it up to date (RFC 3986); The newer RFC has a different
// set of reserved characters. Credit goes to davis dot peixoto at gmail dot com
// God bless PHP comments.
$entities = array('%21', '%2A', '%27', '%28', '%29', '%3B', '%3A', '%40',
'%26', '%3D', '%2B', '%24', '%2C', '%2F', '%3F', '%23', '%5B', '%5D');

$replacements = array('!', '*', "'", "(", ")", ";", ":", "@", "&", "=", "+",
"$", ",", "/", "?", "#", "[", "]");

$string = str_replace($entities, $replacements, rawurlencode($string));

return str_replace('&#039;', '&apos;', $string);
}

/**
* Format a unix timestamp into W3C Datetime
*
* @access public
* @see http://www.w3.org/TR/NOTE-datetime
* @param string $unix Unixtimestamp
* @return string W3C Datetime
*/
public static function date_format($unix)
{
if (is_numeric($unix) AND $unix <= PHP_INT_MAX) {
return date('Y-m-d\TH:i:sP', $unix);
}

throw new \InvalidArgumentException('Must be a unix timestamp');
}

/**
* @return string Either an XML document or a gzipped file
*/
public function render()
{
// Default uncompressed
$response = $this->_xml->saveXML();

if ($this->gzip) {
// Try and gzip the file before we send it off.
try {
$response = gzencode($response, $this->compression);
} catch (ErrorException $e) {
\Baseapp\Bootstrap::exception($e);
}
}

return $response;
}

/**
* @return string XML output.
*/
public function __toString()
{
return $this->render();
}

}
139 changes: 139 additions & 0 deletions app/common/library/Sitemap/Code.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
<?php

namespace Baseapp\Library\Sitemap;

class Code implements \Baseapp\Library\Sitemap\SitemapInterface
{

private $_attributes = array(
'filetype' => null,
'license' => null,
'filename' => null,
'packageurl' => null,
'packagemap' => null
);
protected $_licenses = array(
'aladdin', 'artistic', 'apache', 'apple', 'bsd', 'cpl', 'gpl', 'lgpl', 'disclaimer',
'ibm', 'lucent', 'mit', 'mozilla', 'nasa', 'python', 'qpl', 'sleepycat', 'zope'
);
protected $_archives = array(
'.tar', '.tar.z', '.tar.gz', '.tgz', '.tar.bz2', '.tbz', '.tbz2', '.zip'
);

/**
* @param string $type Case-insensitive. The value "archive" indicates that
* the file is an archive file. For source code files, the value defines the
* the source code language. Examples include "C", "Python", "C#", "Java", "Vim".
* For source code language, the Short Name, as specified in the list of supported
* languages, must be used. The value must be printable ASCII characters, and
* no white space is allowed.
*
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=75252
*/
public function set_file_type($type)
{
$type = (string) $type;

if (!preg_match('/^[a-z][a-z0-9+#]*$/i', $type)) {
throw new \InvalidArgumentException('Type must only contain a-z, 0-9, + and #');
}

$this->_attributes['filetype'] = $type;

return $this;
}

/**
* @param string $license Case-insensitive. The name of the software license.
* For archive files, this indicates the default license for files in the archive.
* Examples include "GPL", "BSD", "Python", "disclaimer". You must use the Short
* Name, as specified in the list of supported licenses.
*
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=75256
*/
public function set_license($license)
{
$license = (string) $license;

if (!in_array($license, $this->_licenses)) {
throw new \InvalidArgumentException('Invalid license type. See http://www.google.com/support/webmasters/bin/answer.py?answer=75256 for details');
}

$this->_attributes['license'] = $license;

return $this;
}

/**
* @param string $file_name The name of the actual file. This is useful if the
* URL ends in something like download.php?id=1234 instead of the actual filename.
* The name can contain any character except "/". If the file is an archive file,
* it will be indexed only if it has one of the supported archive suffixes.
*
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=75259
*/
public function set_file_name($file_name)
{
$file_name = (string) $file_name;

if ($this->_attributes['filetype'] === 'archive') {
if (!in_array(pathinfo($file_name, PATHINFO_EXTENSION), $this->_archives)) {
throw new \InvalidArgumentException('Not a valid archive type');
}
}

$this->_attributes['filename'] = basename($file_name);

return $this;
}

/**
* @param <type> $package_type For use only when the value of codesearch:filetype
* is not "archive". The URL truncated at the top-level directory for the package.
* For example, the file http://path/Foo/1.23/bar/file.c could have the package URL
* http://path/Foo/1.23. All files in a package should have the same packageurl.
* This tells us which files belong together.
*/
public function set_package_url($package_type)
{
$this->_attributes['packageurl'] = $package_type;
}

/**
* @param string $package_map Case-sensitive. For use only when codesearch:filetype
* is "archive". The name of the packagemap file inside the archive. Just like a
* Sitemap is a list of files on a web site, a packagemap is a list of files in
* a package.
*
* @see http://www.google.com/help/codesearch_packagemap.html
*/
public function set_package_map($package_map)
{
$this->_attributes['packagemap'] = $package_map;
}

public function create()
{
// Here we need to create a new DOMDocument. This is so we can re-import the
// DOMElement at the other end.
$document = new \DOMDocument;

// Mobile element
$code = $document->createElement('codesearch:codesearch');

// Append attributes
foreach ($this->_attributes as $name => $value) {
if (null !== $value) {
$code->appendChild($document->createElement('codesearch:' . $name, $value));
}
}

return $code;
}

public function root(\DOMElement & $root)
{
$root->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:codesearch', 'http://www.google.com/codesearch/schemas/sitemap/1.0');
}

}
50 changes: 50 additions & 0 deletions app/common/library/Sitemap/Geo.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

namespace Baseapp\Library\Sitemap;

class Geo implements \Baseapp\Library\Sitemap\SitemapInterface
{

protected $_format = null;
protected $_allowed_formats = array(
'kml', 'kmz', 'georss'
);

/**
* @param string $format Case-insensitive. Specifies the format of the geo content.
* Examples include "kml" and "georss". Only supported formats will be indexed.
*
* @see http://www.google.com/support/webmasters/bin/answer.py?answer=94556
*/
public function set_format($format)
{
if (in_array((string) $format, $this->_allowed_formats)) {
$this->_format = $format;

return $this;
}

throw new \InvalidArgumentException('The format must either be kml, kmlz or georss');
}

public function create()
{
// Here we need to create a new DOMDocument. This is so we can re-import the
// DOMElement at the other end.
$document = new \DOMDocument;

// Mobile element
$geo = $document->createElement('geo:geo');

// Add format
$geo->appendChild($document->createElement('geo:format', $this->_format));

return $geo;
}

public function root(\DOMElement & $root)
{
$root->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:geo', 'http://www.google.com/geo/schemas/sitemap/1.0');
}

}
Loading

0 comments on commit 066f38d

Please sign in to comment.