diff --git a/.autotest b/.autotest deleted file mode 100644 index 2ac8192f..00000000 --- a/.autotest +++ /dev/null @@ -1,36 +0,0 @@ -class Autotest - ## - # Convert a path in a string, s, into a class name, changing - # underscores to CamelCase, etc. - - def path_to_classname(s) - sep = File::SEPARATOR - f = s.sub(/^test#{sep}/, '').sub(/\.rb$/, '').split(sep) - f = f.map { |path| path.split(/_|(\d+)/).map { |seg| seg.capitalize }.join } - f = f.map { |path| path =~ /Test$/ ? path : "#{path}Test" } - f.join('::') - end -end - -Autotest.add_hook :initialize do |at| - unless ARGV.empty? - if ARGV[0] == '-d' - at.find_directories = ARGV[1..-1].dup - else - at.find_directories = [] - at.extra_files = ARGV.dup - end - end - - # doesn't seem to work - # at.clear_mappings - - at.add_mapping(/^lib\/.*\.rb$/) do |filename, _| - possible = File.basename(filename, 'rb').gsub '_', '_?' - files_matching %r%^test/.*#{possible}_test\.rb$% - end - - at.add_mapping(/^test.*\/.*test\.rb$/) do |filename, _| - filename - end -end \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..1a50f90b --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,88 @@ +version: 2.1 + +jobs: + gem-test: + parameters: + ruby-version: + type: string + docker: + # See https://github.com/CircleCI-Public/cimg-ruby + - image: cimg/ruby:<< parameters.ruby-version >> + steps: + - checkout + - run: + name: Install dependencies + command: | + bundle install --path vendor/bundle --jobs=2 + - run: + name: Run tests + command: | + bundle exec rspec --profile 10 --format RspecJunitFormatter --out ./test-results/rspec/results.xml --format progress + - store_test_results: + path: test-results + + integration-test: + parameters: + ruby-version: + type: string + rails-version: + type: string + docker: + # See https://github.com/CircleCI-Public/cimg-ruby + - image: cimg/ruby:<< parameters.ruby-version >> + working_directory: ~/sitemap_generator/integration + steps: + - checkout: + path: ~/sitemap_generator + - run: + name: Install sqlite3 when on Ruby 2.5 + command: | + if [[ "$RUBY_VERSION" =~ 2\.5 ]]; then + sudo apt-get update && sudo apt-get install -y sqlite3 libsqlite3-dev + fi + - run: + name: Install Rails dependencies + environment: + BUNDLE_GEMFILE: "./gemfiles/rails_<< parameters.rails-version >>.gemfile" + command: | + bundle config set --local path 'vendor/bundle' + bundle install --jobs=2 + - run: + name: Run integration tests + environment: + BUNDLE_GEMFILE: "./gemfiles/rails_<< parameters.rails-version >>.gemfile" + command: | + bundle exec rspec --profile 10 --format RspecJunitFormatter --out ./test-results/rspec/results.xml --format progress + - store_test_results: + path: test-results + +workflows: + test: + jobs: + - gem-test: + # See https://circleci.com/blog/circleci-matrix-jobs/ + matrix: + parameters: + ruby-version: ["3.1", "3.0", "2.7", "2.5"] + name: gem-test-ruby-<< matrix.ruby-version >> + - integration-test: + # See https://circleci.com/blog/circleci-matrix-jobs/ + # See https://www.fastruby.io/blog/ruby/rails/versions/compatibility-table.html for Ruby and Rails compatibility + matrix: + parameters: + ruby-version: ["3.1", "3.0", "2.7", "2.5"] + rails-version: ["5_2", "6_0", "6_1", "7_0"] + exclude: + - ruby-version: "2.5" + rails-version: "7_0" + - ruby-version: "2.7" + rails-version: "5_2" + - ruby-version: "3.0" + rails-version: "5_2" + - ruby-version: "3.0" + rails-version: "6_0" + - ruby-version: "3.1" + rails-version: "5_2" + - ruby-version: "3.1" + rails-version: "6_0" + name: integration-test-ruby-<< matrix.ruby-version >>-rails-<< matrix.rails-version >> diff --git a/.gitignore b/.gitignore index 341b0ec4..b132b6fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,13 @@ -pkg +.byebug_history +.DS_Store *.swp +pkg +tmp/**/* +*.bundle +*.orig +coverage +.idea +public +Gemfile.lock +integration/Gemfile.lock +integration/gemfiles/*.lock diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 00000000..fd2a0186 --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +3.1.0 diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 00000000..08acdb3b --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,290 @@ +### 6.1.2 + +* Resolve NoMethodError using URI#open for Ruby less than 2.5.0 [#353](https://github.com/kjvarga/sitemap_generator/pull/353) + +### 6.1.1 + +* Resolve deprecation warning on using Kernel#open in Ruby 2.7 (use URI.open instead) [#342](https://github.com/kjvarga/sitemap_generator/pull/342) +* Support S3 Endpoints for S3 Compliant Providers like DigitalOcean Spaces [#325](https://github.com/kjvarga/sitemap_generator/pull/325) + +### 6.1.0 + +* Support uploading files to Google Cloud Storage [#326](https://github.com/kjvarga/sitemap_generator/pull/326) and [#340](https://github.com/kjvarga/sitemap_generator/pull/340) + +### 6.0.2 + +* Resolve `BigDecimal.new is deprecated` warnings in Ruby 2.5 [#305](https://github.com/kjvarga/sitemap_generator/pull/305). +* Resolve `instance variable not initialized`, `File.exists? is deprecated` and `'*' interpreted as argument prefix` warnings [#304](https://github.com/kjvarga/sitemap_generator/pull/304). + +### 6.0.1 + +* Use `yaml_tag` instead of `yaml_as`, which was deprecated in Ruby 2.4, and removed in 2.5 [#298](https://github.com/kjvarga/sitemap_generator/pull/298). + +### 6.0.0 + +*Backwards incompatible changes* + +* Adapters (AWS SDK, S3, Fog & Wave) no longer load their dependencies. It is up to the user + to `require` the appropriate libraries for the adapter to work. +* AwsSdkAdapter: Fixed [#279](https://github.com/kjvarga/sitemap_generator/issues/279) where sitemaps were incorrectly nested under a `sitemaps/` directory in S3 +* Stop supporting Ruby < 2.0, test with Ruby 2.4. + +*Other changes* + +* If Rails is defined but the application is not loaded, don't include the URL helpers. + +### 5.3.1 + +* Ensure files have 644 permissions when building to try to address issue [#264](https://github.com/kjvarga/sitemap_generator/issues/264) +* Use HTTPS in the Gemfile (PR #[#263](https://github.com/kjvarga/sitemap_generator/pull/263)) + +### 5.3.0 + +* Add `max_sitemap_links` option support for limiting how many links each sitemap can hold. Issue [#188](https://github.com/kjvarga/sitemap_generator/issues/188) PR [#262](https://github.com/kjvarga/sitemap_generator/pull/262) +* Upgrade development dependencies +* Modernize Gemfile & gemspec +* Bring specs up to RSpec 3.5 +* Remove Geo sitemap support. Google no longer supports them. Issue [#246](https://github.com/kjvarga/sitemap_generator/issues/246) +* Use `sitemap` namespace for Capistrano tasks (rather than `deploy`). PR [#241](https://github.com/kjvarga/sitemap_generator/pull/241) +* Use presence of `Rails::VERSION` to detect when running under Rails, rather than just `Rails` constant. PR [#221](https://github.com/kjvarga/sitemap_generator/pull/221) +* Remove gem post-install message warning about incompatible changes in version 4 + +### 5.2.0 + +* New `SitemapGenerator::AwsSdkAdapter` adapter using the bare aws-sdk gem. +* Fix Bing ping url. +* Support string option keys passed to `add`. +* In Railtie, Load the rake task instead of requiring them. + +### 5.1.0 + +* Require only `fog-aws` instead of `fog` for the `S3Adapter` and support using IAM profile instead of setting access key & secret directly. +* Implement `respond_to?` on the `SitemapGenerator::Sitemap` pseudo class. +* Make `:lang` optional on alternate links so they can be used for [AppIndexing](https://developers.google.com/app-indexing/reference/deeplinks). +* Documented Mobile Sitemaps `:mobile` option. + +### 5.0.5 + +* Use MIT licence. +* Fix deploys with Capistrano 3 ([#163](https://github.com/kjvarga/sitemap_generator/issues/163)). +* Allow any Fog storage options for S3 adapter ([#167](https://github.com/kjvarga/sitemap_generator/pull/167)). + +### 5.0.4 + +* Don't include the `media` attribute on alternate links unless it's given + +### 5.0.3 + +* Add support for Video sitemaps options `:live` and ':requires_subscription' + +### 5.0.2 + +* Set maximum filesize to 10,000,000 bytes rather than 10,485,760 bytes. + +### 5.0.1 + +* Include new `SitemapGenerator::FogAdapter` ([#138](https://github.com/kjvarga/sitemap_generator/pull/138)). +* Fix usage of attr_* methods in `LinkSet` +* Don't override custom getters/setters ([#144](https://github.com/kjvarga/sitemap_generator/pull/144)). +* Fix breaking spec in Ruby 2 ([#142](https://github.com/kjvarga/sitemap_generator/pull/142)). +* Include Capistrano 3.x tasks ([#141](https://github.com/kjvarga/sitemap_generator/pull/141)). + +### 5.0.0 + +* Support new `:compress` option for customizing which files get compressed. +* Remove old deprecated methods: + * Removed options to `LinkSet::add()`: `:sitemaps_namer` and `:sitemap_index_namer` (use `:namer` option) + * Removed `LinkSet::sitemaps_namer=`, `LinkSet::sitemaps_namer` (use `LinkSet::namer=` and `LinkSet::namer`) + * Removed `LinkSet::sitemaps_index_namer=`, `LinkSet::sitemaps_index_namer` (use `LinkSet::namer=` and `LinkSet::namer`) + * Removed the `SitemapGenerator::SitemapNamer` class (use `SitemapGenerator::SimpleNamer`) + * Removed `LinkSet::add_links()` (use `LinkSet::create()`) +* Support `fog_path_style` option in the `SitemapGenerator::S3Adapter` so buckets with dots in the name work over HTTPS without SSL certificate problems. + +### 4.3.1 + +* Support integer timestamps. +* Update README for new features added in last release. + +### 4.3.0 + +* Support `media` attibute on alternate links ([#125](https://github.com/kjvarga/sitemap_generator/issues/125)). +* Changed `SitemapGenerator::S3Adapter` to write files in a single operation, avoiding potential permissions errors when listing a directory prior to writing ([#130](https://github.com/kjvarga/sitemap_generator/issues/130)). +* Remove Sitemap Writer from ping task ([#129](https://github.com/kjvarga/sitemap_generator/issues/129)). +* Support `url:expires` element ([#126](https://github.com/kjvarga/sitemap_generator/issues/126)). + +### 4.2.0 + +* Update Google ping URL. +* Quote the ping URL in the output. +* Support Video `video:price` element ([#117](https://github.com/kjvarga/sitemap_generator/issues/117)). +* Support symbols as well as strings for most arguments to `add()` ([#113](https://github.com/kjvarga/sitemap_generator/issues/113)). +* Ensure that `public_path` and `sitemaps_path` end with a slash (`/`) ([#113](https://github.com/kjvarga/sitemap_generator/issues/118)). + +### 4.1.1 + +* Support setting the S3 region. +* Fixed bug where incorrect URL was being used in the ping to search engines - only affected sites with a single sitemap file and no index file. +* Output the URL being pinged in the verbose output. +* Test in Rails 4. + +### 4.1.0 + +* [PageMap sitemap][using_pagemaps] support. +* Tested with Rails 4 pre-release. + +### 4.0.1 + +* Add a post install message regarding the naming convention change. + +### 4.0 + +* **NEW, NON-BACKWARDS COMPATIBLE CHANGES.** +* `create_index` defaults to `:auto`. +* Define `SitemapGenerator::SimpleNamer` class for simpler custom namers compatible with the new naming conventions. +* Deprecate `sitemaps_namer`, `sitemap_index_namer` and their respective namer classes. +* Support `nofollow` option on alternate links. +* Fix formatting of `publication_date` in News sitemaps. + +### 3.4 + +* Support [alternate links][alternate_links] for urls +* Support configurable options in the `SitemapGenerator::S3Adapter` + +### 3.3 + +* Support creating sitemaps with no index file + +### 3.2.1 + +* Fix syntax error in `SitemapGenerator::S3Adapter` + +### 3.2 + +* Support mobile tags +* Add `SitemapGenerator::S3Adapter`, a simple S3 adapter which uses Fog and doesn't require CarrierWave +* Remove Ask from the sitemap ping because the service has been shutdown +* [Turn off `include_index`][include_index_change] by default +* Fix the news XML namespace +* Only include `autoplay` attribute if present + +### 3.1.1 + +* Bugfix +* Groups inherit current adapter + +### 3.1.0 + +* Add `add_to_index` method to add links to the sitemap index. +* Add `sitemap` method for accessing the `LinkSet instance from within `create()`. +* Don't modify options hashes passed to methods. Fix and improve `yield_sitemap` option handling. + +### 3.0.0 + +* **Framework agnostic!** +* Fix alignment in output +* Show directory sitemaps are being generated into +* Only show sitemap compressed file size +* Toggle output using VERBOSE environment variable +* Remove tasks/ directory because it's deprecated in Rails 2 +* Simplify dependencies. + +### 2.2.1 + +* Support adding new search engines to ping and modifying the default search engines. +* Allow the URL of the sitemap index to be passed as an argument to `ping_search_engines`. See Pinging Search Engines in README. + +### 2.1.8 + +* Extend and improve Video Sitemap support. +* Include sitemap docs in the README, support all element attributes, properly format values. + +### 2.1.7 + +* Improve format of float priorities +* Remove Yahoo from ping - the Yahoo service has been shut down. + +### 2.1.6 + +* Fix the `lastmod` value on sitemap file links + +### 2.1.5 + +* Fix verbose setting in the rake task, it should default to true + +### 2.1.4 + +* Allow special characters in URLs (don't use `URI.join` to construct URLs) + +### 2.1.3 + +* Fix calling `create` with both `filename` and `sitemaps_namer` options + +### 2.1.2 + +* Support multiple videos per url using the new `videos` option to `add()`. + +### 2.1.1 + +* Support calling `create()` multiple times in a sitemap config +* Support host names with path segments so you can use a `default_host` like `'http://mysite.com/subdirectory/'` +* Turn off `include_index` when the `sitemaps_host` differs from `default_host` +* Add docs about how to upload to remote hosts. + +### 2.1.0 + +* [News sitemap][sitemap_news] support + +### 2.0.1.pre2 + +* Fix uploading to the (bucket) root on a remote server + +### 2.0.1.pre1 + +* Support read-only filesystems like Heroku by supporting uploading to remote host + +### 2.0.1 + +* Minor improvements to verbose handlig +* Prevent missing `Timeout` issue + +### v2.0.0 + +* Introducing a new simpler API, Sitemap Groups, Sitemap Namers and more! + +### 1.5.0 + +* New options `include_root`, `include_index` +* Major testing & refactoring + +### 1.4.0 + +* [Geo sitemap][geo_tags] support +* Multiple sitemap support via CONFIG_FILE rake option + +### 1.3.0 + +* Support setting the sitemaps path + +### 1.2.0 + +* Verified working with Rails 3 stable release + +### 1.1.0 + +* [Video sitemap][sitemap_video] support + +### 0.2.6 + +* [Image Sitemap][sitemap_images] support + +### 0.2.5 + +* Rails 3 prerelease support (beta) + +[geo_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=94555 +[sitemap_images]:http://www.google.com/support/webmasters/bin/answer.py?answer=178636 +[sitemap_video]:https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190 +[sitemap_news]:https://support.google.com/news/publisher/topic/2527688?hl=en&ref_topic=4359874 +[include_index_change]:https://github.com/kjvarga/sitemap_generator/issues/70 +[alternate_links]:http://support.google.com/webmasters/bin/answer.py?hl=en&answer=2620865 +[using_pagemaps]:https://developers.google.com/custom-search/docs/structured_data#pagemaps diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..f75b96a5 --- /dev/null +++ b/Gemfile @@ -0,0 +1,11 @@ +source 'https://rubygems.org' + +gemspec + +if RUBY_VERSION =~ /2.5.*/ + gem 'nokogiri', '1.12.5' +end + +group :test do + gem 'byebug' +end diff --git a/MIT-LICENSE b/MIT-LICENSE index 9376605b..f8dfaf56 100644 --- a/MIT-LICENSE +++ b/MIT-LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 [name of plugin creator] +Copyright (c) 2009 Karl Varga Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/README.md b/README.md index bad4ff16..5ec204bf 100644 --- a/README.md +++ b/README.md @@ -1,168 +1,1200 @@ -SitemapGenerator -================ +# SitemapGenerator -This plugin enables ['enterprise-class'][enterprise_class] Google Sitemaps to be easily generated for a Rails site as a rake task, using a simple 'Rails Routes'-like DSL. +[![CircleCI](https://circleci.com/gh/kjvarga/sitemap_generator/tree/master.svg?style=shield)](https://circleci.com/gh/kjvarga/sitemap_generator/tree/master) -Raison d'être -------- +SitemapGenerator is the easiest way to generate Sitemaps in Ruby. Rails integration provides access to the Rails route helpers within your sitemap config file and automatically makes the rake tasks available to you. Or if you prefer to use another framework, you can! You can use the rake tasks provided or run your sitemap configs as plain ruby scripts. -Most of the Sitemap plugins out there seem to try to recreate the Sitemap links by iterating the Rails routes. In some cases this is possible, but for a great deal of cases it isn't. +Sitemaps adhere to the [Sitemap 0.9 protocol][sitemap_protocol] specification. -a) There are probably quite a few routes in your routes file that don't need inclusion in the Sitemap. (AJAX routes I'm looking at you.) +## Features -and +* Framework agnostic +* Supports [News sitemaps][sitemap_news], [Video sitemaps][sitemap_video], [Image sitemaps][sitemap_images], [Mobile sitemaps][sitemap_mobile], [PageMap sitemaps][sitemap_pagemap] and [Alternate Links][alternate_links] +* Supports read-only filesystems like Heroku via uploading to a remote host like Amazon S3 +* Compatible with all versions of Rails and Ruby +* Adheres to the [Sitemap 0.9 protocol][sitemap_protocol] +* Handles millions of links +* Customizable sitemap compression +* Notifies search engines (Google, Bing) of new sitemaps +* Ensures your old sitemaps stay in place if the new sitemap fails to generate +* Gives you complete control over your sitemap contents and naming scheme +* Intelligent sitemap indexing -b) How would you infer the correct series of links for the following route? +### Show Me - map.zipcode 'location/:state/:city/:zipcode', :controller => 'zipcode', :action => 'index' - -Don't tell me it's trivial, because it isn't. It just looks trivial. +This is a simple standalone example. For Rails installation see the [Rails instructions](#rails) in the [Install](#installation) section. -So my idea is to have another file similar to 'routes.rb' called 'sitemap.rb', where you can define what goes into the Sitemap. +Install: -Here's my solution: +``` +gem install sitemap_generator +``` - Zipcode.find(:all, :include => :city).each do |z| - sitemap.add zipcode_path(:state => z.city.state, :city => z.city, :zipcode => z) - end +Create `sitemap.rb`: -Easy hey? +```ruby +require 'rubygems' +require 'sitemap_generator' -Other Sitemap settings for the link, like `lastmod`, `priority`, `changefreq` and `host` are entered automatically, although you can override them if you need to. +SitemapGenerator::Sitemap.default_host = 'http://example.com' +SitemapGenerator::Sitemap.create do + add '/home', :changefreq => 'daily', :priority => 0.9 + add '/contact_us', :changefreq => 'weekly' +end +SitemapGenerator::Sitemap.ping_search_engines # Not needed if you use the rake tasks +``` -Other "difficult" Sitemap issues, solved by this plugin: +Run it: -- Support for more than 50,000 urls (using a Sitemap Index file) -- Gzip of Sitemap files -- Variable priority of links -- Paging/sorting links (e.g. my_list?page=3) -- SSL host links (e.g. https:) -- Rails apps which are installed on a sub-path (e.g. example.com/blog_app/) +``` +ruby sitemap.rb +``` -Installation -======= +Output: -**As a gem** +``` +In /Users/karl/projects/sitemap_generator-test/public/ ++ sitemap.xml.gz 3 links / 364 Bytes +Sitemap stats: 3 links / 1 sitemaps / 0m00s -1. Add the gem as a dependency in your config/environment.rb +Successful ping of Google +Successful ping of Bing +``` - config.gem 'sitemap_generator', :lib => false, :source => 'http://gemcutter.org' +## Contents -2. `$ rake gems:install` +- [SitemapGenerator](#sitemapgenerator) + - [Features](#features) + - [Show Me](#show-me) + - [Contents](#contents) + - [Contribute](#contribute) + - [Foreword](#foreword) + - [Installation](#installation) + - [Ruby](#ruby) + - [Rails](#rails) + - [Getting Started](#getting-started) + - [Preventing Output](#preventing-output) + - [Rake Tasks](#rake-tasks) + - [Pinging Search Engines](#pinging-search-engines) + - [Crontab](#crontab) + - [Robots.txt](#robotstxt) + - [Ruby Modules](#ruby-modules) + - [Deployments & Capistrano](#deployments--capistrano) + - [Sitemaps with no Index File](#sitemaps-with-no-index-file) + - [Upload Sitemaps to a Remote Host using Adapters](#upload-sitemaps-to-a-remote-host-using-adapters) + - [Supported Adapters](#supported-adapters) + - [`SitemapGenerator::FileAdapter`](#sitemapgeneratorfileadapter) + - [`SitemapGenerator::FogAdapter`](#sitemapgeneratorfogadapter) + - [`SitemapGenerator::S3Adapter`](#sitemapgenerators3adapter) + - [`SitemapGenerator::AwsSdkAdapter`](#sitemapgeneratorawssdkadapter) + - [`SitemapGenerator::AwsSdkAdapter (DigitalOcean Spaces)`](#sitemapgeneratorawssdkadapter-digitalocean-spaces) + - [`SitemapGenerator::WaveAdapter`](#sitemapgeneratorwaveadapter) + - [`SitemapGenerator::GoogleStorageAdapter`](#sitemapgeneratorgooglestorageadapter) + - [An Example of Using an Adapter](#an-example-of-using-an-adapter) + - [Generating Multiple Sitemaps](#generating-multiple-sitemaps) + - [Sitemap Configuration](#sitemap-configuration) + - [A Simple Example](#a-simple-example) + - [Adding Links](#adding-links) + - [Supported Options to `add`](#supported-options-to-add) + - [Adding Links to the Sitemap Index](#adding-links-to-the-sitemap-index) + - [Accessing the LinkSet instance](#accessing-the-linkset-instance) + - [Speeding Things Up](#speeding-things-up) + - [Customizing your Sitemaps](#customizing-your-sitemaps) + - [Sitemap Options](#sitemap-options) + - [Sitemap Groups](#sitemap-groups) + - [A Groups Example](#a-groups-example) + - [Using `group` without a block](#using-group-without-a-block) + - [Sitemap Extensions](#sitemap-extensions) + - [News Sitemaps](#news-sitemaps) + - [Example](#example) + - [Supported options](#supported-options) + - [Image Sitemaps](#image-sitemaps) + - [Example](#example-1) + - [Supported options](#supported-options-1) + - [Video Sitemaps](#video-sitemaps) + - [Example](#example-2) + - [Supported options](#supported-options-2) + - [PageMap Sitemaps](#pagemap-sitemaps) + - [Supported options](#supported-options-3) + - [Example:](#example-3) + - [Alternate Links](#alternate-links) + - [Example](#example-4) + - [Supported options](#supported-options-4) + - [Alternates Example](#alternates-example) + - [ Mobile Sitemaps](#-mobile-sitemaps) + - [Example](#example-5) + - [Supported options](#supported-options-5) + - [Compatibility](#compatibility) + - [Licence](#licence) -3. Add the following line to your RAILS_ROOT/Rakefile +## Contribute - require 'sitemap_generator/tasks' rescue LoadError +Does your website use SitemapGenerator to generate Sitemaps? Where would you be without Sitemaps? Probably still knocking rocks together. Consider donating to the project to keep it up-to-date and open source. -4. `$ rake sitemap:install` +Click here to lend your support to: SitemapGenerator and make a donation at www.pledgie.com ! -**As a plugin** -1. Install plugin as normal +## Foreword - $ ./script/plugin install git://github.com/adamsalter/sitemap_generator.git +Adam Salter first created SitemapGenerator while we were working together in Sydney, Australia. Unfortunately, he passed away in 2009. Since then I have taken over development of SitemapGenerator. ----- +Those who knew him know what an amazing guy he was, and what an excellent Rails programmer he was. His passing is a great loss to the Rails community. -Installation should create a 'config/sitemap.rb' file which will contain your logic for generation of the Sitemap files. (If you want to recreate this file manually run `rake sitemap:install`) +The canonical repository is: [http://github.com/kjvarga/sitemap_generator][canonical_repo] -You can run `rake sitemap:refresh` as needed to create Sitemap files. This will also ping all the ['major'][sitemap_engines] search engines. (if you want to disable all non-essential output run the rake task thusly `rake -s sitemap:refresh`) -Sitemaps with many urls (100,000+) take quite a long time to generate, so if you need to refresh your Sitemaps regularly you can set the rake task up as a cron job. Most cron agents will only send you an email if there is output from the cron task. +## Installation -Optionally, you can add the following to your robots.txt file, so that robots can find the sitemap file. +### Ruby - Sitemap: /sitemap_index.xml.gz - -The robots.txt Sitemap URL should be the complete URL to the Sitemap Index, such as: `http://www.example.org/sitemap_index.xml.gz` +``` +gem install 'sitemap_generator' +``` +To use the rake tasks add the following to your `Rakefile`: -Example 'config/sitemap.rb' -========== +```ruby +require 'sitemap_generator/tasks' +``` - # Set the host name for URL creation - SitemapGenerator::Sitemap.default_host = "http://www.example.com" +The Rake tasks expect your sitemap to be at `config/sitemap.rb` but if you need to change that call like so: `rake sitemap:refresh CONFIG_FILE="path/to/sitemap.rb"` - SitemapGenerator::Sitemap.add_links do |sitemap| - # Put links creation logic here. - # - # The Root Path ('/') and Sitemap Index file are added automatically. - # Links are added to the Sitemap output in the order they are specified. - # - # Usage: sitemap.add path, options - # (default options are used if you don't specify them) - # - # Defaults: :priority => 0.5, :changefreq => 'weekly', - # :lastmod => Time.now, :host => default_host +### Rails - - # Examples: - - # add '/articles' - sitemap.add articles_path, :priority => 0.7, :changefreq => 'daily' +SitemapGenerator works with all versions of Rails and has been tested in Rails 2, 3 and 4. - # add all individual articles - Article.find(:all).each do |a| - sitemap.add article_path(a), :lastmod => a.updated_at - end +Add the gem to your `Gemfile`: - # add merchant path - sitemap.add '/purchase', :priority => 0.7, :host => "https://www.example.com" - - end +```ruby +gem 'sitemap_generator' +``` -Notes -======= +Alternatively, if you are not using a `Gemfile` add the gem to your `config/application.rb` file config block: -1) Tested/working on Rails 1.x.x <=> 2.x.x, no guarantees made for Rails 3.0. +```ruby +config.gem 'sitemap_generator' +``` -2) For large sitemaps it may be useful to split your generation into batches to avoid running out of memory. E.g.: +Note: SitemapGenerator automatically loads its Rake tasks when used with Rails. You **do not need** to require the `sitemap_generator/tasks` file. - # add movies - Movie.find_in_batches(:batch_size => 1000) do |movies| - movies.each do |movie| - sitemap.add "/movies/show/#{movie.to_param}", :lastmod => movie.updated_at, :changefreq => 'weekly' - end - end +## Getting Started + +### Preventing Output + +To disable all non-essential output you can pass the `-s` option to Rake, for example `rake -s sitemap:refresh`, or set the environment variable `VERBOSE=false` when calling as a Ruby script. + +To disable output in-code use the following: + +```ruby +SitemapGenerator.verbose = false +``` + +### Rake Tasks + +* `rake sitemap:install` will create a `config/sitemap.rb` file which is your sitemap configuration + and contains everything needed to build your sitemap. See + [**Sitemap Configuration**](#sitemap-configuration) below for more information about how to + define your sitemap. + +* `rake sitemap:refresh` will create or rebuild your sitemap files as needed. Sitemaps are + generated into the `public/` folder and by default are named `sitemap.xml.gz`, `sitemap1.xml.gz`, + `sitemap2.xml.gz`, etc. As you can see, they are automatically GZip compressed for you. In this case, + `sitemap.xml.gz` is your sitemap "index" file. + + `rake sitemap:refresh` will output information about each sitemap that is written including its + location, how many links it contains, and the size of the file. + +### Pinging Search Engines + +Using `rake sitemap:refresh` will notify Google and Bing to let them know that a new sitemap +is available. To generate new sitemaps without notifying search engines, use `rake sitemap:refresh:no_ping`. + +If you want to customize the hash of search engines you can access it at: + +```ruby +SitemapGenerator::Sitemap.search_engines +``` + +Usually you would be adding a new search engine to ping. In this case you can modify +the `search_engines` hash directly. This ensures that when +`SitemapGenerator::Sitemap.ping_search_engines` is called, your new search engine will be included. + +If you are calling `ping_search_engines` manually, then you can pass your new search engine +directly in the call, as in the following example: + +```ruby +SitemapGenerator::Sitemap.ping_search_engines(newengine: 'http://newengine.com/ping?url=%s') +``` + +The key gives the name of the search engine, as a string or symbol, and the value is the full URL to ping, with a string interpolation that will be replaced by the CGI escaped sitemap index URL. If you have any literal percent characters in your URL you need to escape them with `%%`. + +If you are calling `SitemapGenerator::Sitemap.ping_search_engines` from outside of your sitemap config file, then you will need to set `SitemapGenerator::Sitemap.default_host` and any other options that you set in your sitemap config which affect the location of the sitemap index file. For example: + +```ruby +SitemapGenerator::Sitemap.default_host = 'http://example.com' +SitemapGenerator::Sitemap.ping_search_engines +``` + +Alternatively, you can pass in the full URL to your sitemap index, in which case we would have just the following: + +```ruby +SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap.xml.gz') +``` + +### Crontab + +To keep your sitemaps up-to-date, setup a cron job. Make sure to pass the `-s` option to silence rake. That way you will only get email if the sitemap build fails. + +If you're using [Whenever](https://github.com/javan/whenever), your schedule would look something like this: + +```ruby +# config/schedule.rb +every 1.day, :at => '5:00 am' do + rake "-s sitemap:refresh" +end +``` + +### Robots.txt + +You should add the URL of the sitemap index file to `public/robots.txt` to help search engines find your sitemaps. The URL should be the complete URL to the sitemap index. For example: + +``` +Sitemap: http://www.example.com/sitemap.xml.gz +``` + +### Ruby Modules + +If you need to include a module (e.g. a rails helper), you must include it in the sitemap interpreter +class. The part of your sitemap configuration that defines your sitemaps is run within an instance +of the `SitemapGenerator::Interpreter`: + +```ruby +SitemapGenerator::Interpreter.send :include, RoutingHelper +``` + +### Deployments & Capistrano + +To include the capistrano tasks just add the following to your Capfile: + +```ruby +require 'capistrano/sitemap_generator' +``` + +Configurable options: + +```ruby +set :sitemap_roles, :web # default +``` + +Available capistrano tasks: + +```ruby +sitemap:create #Create sitemaps without pinging search engines +sitemap:refresh #Create sitemaps and ping search engines +sitemap:clean #Clean up sitemaps in the sitemap path +``` + + **Generate sitemaps into a directory which is shared by all deployments.** + + You can set your sitemaps path to your shared directory using the `sitemaps_path` option. For example if we have a directory `public/shared/` that is shared by all deployments we can have our sitemaps generated into that directory by setting: + +```ruby +SitemapGenerator::Sitemap.sitemaps_path = 'shared/' +``` + +### Sitemaps with no Index File + +The sitemap index file is created for you on-demand, meaning that if you have a large site with more than one sitemap file, you will have a sitemap index file to reference those sitemap files. If however you have a small site with only one sitemap file, you don't require an index and so no index will be created. In both cases the index and sitemap file's name, respectively, is `sitemap.xml.gz`. + +You may want to always create an index, even if you only have a small site. Or you may never want to create an index. For these cases, you can use the `create_index` option to control index creation. You can read about this option in the Sitemap Options section below. + +To always create an index: + +```ruby +SitemapGenerator::Sitemap.create_index = true +``` + +To never create an index: + +```ruby +SitemapGenerator::Sitemap.create_index = false +``` +Your sitemaps will still be called `sitemap.xml.gz`, `sitemap1.xml.gz`, `sitemap2.xml.gz`, etc. + +And the default "intelligent" behaviour: + +```ruby +SitemapGenerator::Sitemap.create_index = :auto +``` + +### Upload Sitemaps to a Remote Host using Adapters + +_This section needs better documentation. Please consider contributing._ + +Sometimes it is desirable to host your sitemap files on a remote server, and point robots +and search engines to the remote files. For example, if you are using a host like Heroku, +which doesn't allow writing to the local filesystem. You still require *some* write access, +because the sitemap files need to be written out before uploading. So generally a host will +give you write access to a temporary directory. On Heroku this is `tmp/` within your application +directory. + +#### Supported Adapters + +##### `SitemapGenerator::FileAdapter` + + Standard adapter, writes out to a file. + +##### `SitemapGenerator::FogAdapter` + + Uses `Fog::Storage` to upload to any service supported by Fog. + + You must `require 'fog'` in your sitemap config before using this adapter, + or `require` another library that defines `Fog::Storage`. + +##### `SitemapGenerator::S3Adapter` + + Uses `Fog::Storage` to upload to Amazon S3 storage. + + You must `require 'fog-aws'` in your sitemap config before using this adapter. + +##### `SitemapGenerator::AwsSdkAdapter` + + Uses `Aws::S3::Resource` to upload to Amazon S3 storage. Includes automatic detection of your AWS + credentials using `Aws::Credentials`. + + You must `require 'aws-sdk-s3'` in your sitemap config before using this adapter, + or `require` another library that defines `Aws::S3::Resource` and `Aws::Credentials`. + + An example of using this adapter in your sitemap configuration: + + ```ruby + SitemapGenerator::Sitemap.adapter = SitemapGenerator::AwsSdkAdapter.new('s3_bucket', + aws_access_key_id: 'AKIAI3SW5CRAZBL4WSTA', + aws_secret_access_key: 'asdfadsfdsafsadf', + aws_region: 'us-east-1' + ) + ``` + +##### `SitemapGenerator::AwsSdkAdapter (DigitalOcean Spaces)` + + Uses `Aws::S3::Resource` to upload to Amazon S3 storage. Includes automatic detection of your AWS + credentials using `Aws::Credentials`. + + You must `require 'aws-sdk-s3'` in your sitemap config before using this adapter, + or `require` another library that defines `Aws::S3::Resource` and `Aws::Credentials`. + + An example of using this adapter in your sitemap configuration: + + ```ruby + SitemapGenerator::Sitemap.adapter = SitemapGenerator::AwsSdkAdapter.new('s3_bucket', + aws_access_key_id: 'AKIAI3SW5CRAZBL4WSTA', + aws_secret_access_key: 'asdfadsfdsafsadf', + aws_region: 'sfo2', + aws_endpoint: 'https://sfo2.digitaloceanspaces.com' + ) + ``` + +##### `SitemapGenerator::WaveAdapter` + + Uses `CarrierWave::Uploader::Base` to upload to any service supported by CarrierWave, for example, + Amazon S3, Rackspace Cloud Files, and MongoDB's GridF. + + You must `require 'carrierwave'` in your sitemap config before using this adapter, + or `require` another library that defines `CarrierWave::Uploader::Base`. + + Some documentation exists [on the wiki page][remote_hosts]. + +##### `SitemapGenerator::GoogleStorageAdapter` + + Uses [`Google::Cloud::Storage`][google_cloud_storage_gem] to upload to Google Cloud storage. + + You must `require 'google/cloud/storage'` in your sitemap config before using this adapter. + + An example of using this adapter in your sitemap configuration with options: + + ```ruby + SitemapGenerator::Sitemap.adapter = SitemapGenerator::GoogleStorageAdapter.new( + credentials: 'path/to/keyfile.json', + project_id: 'google_account_project_id', + bucket: 'name_of_bucket' + ) + ``` + Also, inline with Google Authentication options, it can also pick credentials from environment variables. All [supported environment variables][google_cloud_storage_authentication] can be used, for example: `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_CREDENTIALS`. An example of using this adapter with the environment variables is: + + ```ruby + SitemapGenerator::Sitemap.adapter = SitemapGenerator::GoogleStorageAdapter.new( + bucket: 'name_of_bucket' + ) + ``` + + All options other than the `:bucket` option are passed to the `Google::Cloud::Storage.new` initializer giving you maximum configurability. See the [Google Cloud Storage initializer][google_cloud_storage_initializer] for supported options. + +#### An Example of Using an Adapter + +1. Please see [this wiki page][remote_hosts] for more information about setting up SitemapGenerator to upload to a + remote host. + +2. This example uses the CarrierWave adapter. It shows some common settings that are used when the hostname hosting + the sitemaps differs from the hostname of the sitemap links. + + ```ruby + # Your website's host name + SitemapGenerator::Sitemap.default_host = "http://www.example.com" + + # The remote host where your sitemaps will be hosted + SitemapGenerator::Sitemap.sitemaps_host = "http://s3.amazonaws.com/sitemap-generator/" + + # The directory to write sitemaps to locally + SitemapGenerator::Sitemap.public_path = 'tmp/' + + # Set this to a directory/path if you don't want to upload to the root of your `sitemaps_host` + SitemapGenerator::Sitemap.sitemaps_path = 'sitemaps/' + + # The adapter to perform the upload of sitemap files. + SitemapGenerator::Sitemap.adapter = SitemapGenerator::WaveAdapter.new + ``` + +3. Update your `robots.txt` file to point robots to the remote sitemap index file, e.g: + + ``` + Sitemap: http://s3.amazonaws.com/sitemap-generator/sitemaps/sitemap.xml.gz + ``` + + You generate your sitemaps as usual using `rake sitemap:refresh`. + + Note that SitemapGenerator will automatically turn off `include_index` in this case because + the `sitemaps_host` does not match the `default_host`. The link to the sitemap index file + that would otherwise be included would point to a different host than the rest of the links + in the sitemap, something that the sitemap rules forbid. (Since version 3.2 this is no + longer an issue because [`include_index` is off by default][include_index_change].) + +4. Verify to Google that you own the S3 url + + In order for Google to use your sitemap, you need to prove you own the S3 bucket through [google webmaster tools](https://www.google.com/webmasters/tools/home?hl=en). In the example above, you would add the site `http://s3.amazonaws.com/sitemap-generator/sitemaps`. Once you have verified you own the directory, then add your + sitemap index to the list of sitemaps for the site. + +### Generating Multiple Sitemaps + +Each call to `create` creates a new sitemap index and associated sitemaps. You can call `create` as many times as you want within your sitemap configuration. + +You must remember to use a different filename or location for each set of sitemaps, otherwise they will +overwrite each other. You can use the `filename`, `namer` and `sitemaps_path` options for this. + +In the following example we generate three sitemaps each in its own subdirectory: + +```ruby +%w(google bing apple).each do |subdomain| + SitemapGenerator::Sitemap.default_host = "https://#{subdomain}.mysite.com" + SitemapGenerator::Sitemap.sitemaps_path = "sitemaps/#{subdomain}" + SitemapGenerator::Sitemap.create do + add '/home' + end +end +``` + +Outputs: + +``` ++ sitemaps/google/sitemap1.xml.gz 2 links / 822 Bytes / 328 Bytes gzipped ++ sitemaps/google/sitemap.xml.gz 1 sitemaps / 389 Bytes / 217 Bytes gzipped +Sitemap stats: 2 links / 1 sitemaps / 0m00s ++ sitemaps/bing/sitemap1.xml.gz 2 links / 820 Bytes / 330 Bytes gzipped ++ sitemaps/bing/sitemap.xml.gz 1 sitemaps / 388 Bytes / 217 Bytes gzipped +Sitemap stats: 2 links / 1 sitemaps / 0m00s ++ sitemaps/apple/sitemap1.xml.gz 2 links / 820 Bytes / 330 Bytes gzipped ++ sitemaps/apple/sitemap.xml.gz 1 sitemaps / 388 Bytes / 214 Bytes gzipped +Sitemap stats: 2 links / 1 sitemaps / 0m00s +``` + +If you don't want to have to generate all the sitemaps at once, or you want to refresh some more often than others, you can split them up into their own configuration files. Using the above example we would have: + +```ruby +# config/google_sitemap.rb +SitemapGenerator::Sitemap.default_host = "https://google.mysite.com" +SitemapGenerator::Sitemap.sitemaps_path = "sitemaps/google" +SitemapGenerator::Sitemap.create do + add '/home' +end + +# config/apple_sitemap.rb +SitemapGenerator::Sitemap.default_host = "https://apple.mysite.com" +SitemapGenerator::Sitemap.sitemaps_path = "sitemaps/apple" +SitemapGenerator::Sitemap.create do + add '/home' +end + +# config/bing_sitemap.rb +SitemapGenerator::Sitemap.default_host = "https://bing.mysite.com" +SitemapGenerator::Sitemap.sitemaps_path = "sitemaps/bing" +SitemapGenerator::Sitemap.create do + add '/home' +end +``` + + +To generate each one specify the configuration file to run by passing the `CONFIG_FILE` option to `rake sitemap:refresh`, e.g.: + +``` +rake sitemap:refresh CONFIG_FILE="config/google_sitemap.rb" +rake sitemap:refresh CONFIG_FILE="config/apple_sitemap.rb" +rake sitemap:refresh CONFIG_FILE="config/bing_sitemap.rb" +``` + +## Sitemap Configuration + +A sitemap configuration file contains all the information needed to generate your sitemaps. By default SitemapGenerator looks for a configuration file in `config/sitemap.rb` - relative to your application root or the current working directory. (Run `rake sitemap:install` to have this file generated for you if you have not done so already.) + +If you want to use a non-standard configuration file, or have multiple configuration files, you can specify which one to run by passing the `CONFIG_FILE` option like so: + +``` +rake sitemap:refresh CONFIG_FILE="config/geo_sitemap.rb" +``` + +### A Simple Example + +So what does a sitemap configuration look like? Let's take a look at a simple example: + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add '/welcome' +end +``` + +A few things to note: + +* `SitemapGenerator::Sitemap` is a lazy-initialized sitemap object provided for your convenience. +* Every sitemap must set `default_host`. This is the hostname that is used when building links to add to the sitemap (and all links in a sitemap must belong to the same host). +* The `create` method takes a block with calls to `add` to add links to the sitemap. +* The sitemaps are written to the `public/` directory in the directory from which the script is run. You can specify a custom location using the `public_path` or `sitemaps_path` option. + +Now let's see what is output when we run this configuration with `rake sitemap:refresh:no_ping`: + +``` +In /Users/karl/projects/sitemap_generator-test/public/ ++ sitemap.xml.gz 2 links / 347 Bytes +Sitemap stats: 2 links / 1 sitemaps / 0m00s +``` + +Weird! The sitemap has two links, even though we only added one! This is because SitemapGenerator adds the root URL `/` for you by default. (Note that prior to version 3.2 the URL of the sitemap index file was also added to the sitemap by default but [this behaviour has been changed][include_index_change] because of Google complaining about nested indexing. This also doesn't make sense anymore because indexes are not always needed.) You can change the default behaviour by setting the `include_root` or `include_index` option. + +Now let's take a look at the file that was created. After uncompressing and XML-tidying the contents we have: + + +* `public/sitemap.xml.gz` + +```xml + + + + http://www.example.com/ + 2011-05-21T00:03:38+00:00 + always + 1.0 + + + http://www.example.com/welcome + 2011-05-21T00:03:38+00:00 + weekly + 0.5 + + +``` + +The sitemaps conform to the [Sitemap 0.9 protocol][sitemap_protocol]. Notice the value for `priority` and `changefreq` on the root link, the one that was added for us? The values tell us that this link is the highest priority and should be checked regularly because it are constantly changing. You can specify your own values for these options in your call to `add`. + +In this example no sitemap index was created because we have so few links, so none was needed. If we run the same example above and set `create_index = true` we can take a look at what an index file looks like: + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create_index = true +SitemapGenerator::Sitemap.create do + add '/welcome' +end +``` + +And the output: + +``` +In /Users/karl/projects/sitemap_generator-test/public/ ++ sitemap1.xml.gz 2 links / 347 Bytes ++ sitemap.xml.gz 1 sitemaps / 228 Bytes +Sitemap stats: 2 links / 1 sitemaps / 0m00s +``` + +Now if we look at the uncompressed and formatted contents of `sitemap.xml.gz` we can see that it is a sitemap index and `sitemap1.xml.gz` is a sitemap: + +* `public/sitemap.xml.gz` + +```xml + + + + http://www.example.com/sitemap1.xml.gz + 2013-05-01T18:10:26-07:00 + + +``` + +### Adding Links -3) New Capistrano deploys will remove your Sitemap files, unless you run `rake sitemap:refresh`. The way around this is to create a cap task: +You call `add` in the block passed to `create` to add a **path** to your sitemap. `add` takes a string path and optional hash of options, generates the URL and adds it to the sitemap. You only need to pass a **path** because the URL will be built for us using the `default_host` we specified. However, if we want to use a different host for a particular link, we can pass the `:host` option to `add`. - after "deploy:update_code", "deploy:copy_old_sitemap" +Let's see another example: - namespace :deploy do - task :copy_old_sitemap do - run "if [ -e #{previous_release}/public/sitemap_index.xml.gz ]; then cp #{previous_release}/public/sitemap* #{current_release}/public/; fi" - end +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add '/contact_us' + Content.find_each do |content| + add content_path(content), :lastmod => content.updated_at + end +end +``` + +In this example first we add the `/contact_us` page to the sitemap and then we iterate through the Content model's records adding each one to the sitemap using the `content_path` helper method to generate the path for each record. + +The **Rails URL/path helper methods are automatically made available** to us in the `create` block. This keeps the logic for building our paths out of the sitemap config and in the Rails application where it should be. You use those methods just like you would in your application's view files. + +In the example about we pass a `lastmod` (last modified) option with the value of the record's `updated_at` attribute so that search engines know to only re-index the page when the record changes. + +Looking at the output from running this sitemap, we see that we have a few more links than before: + +``` ++ sitemap.xml.gz 12 links / 2.3 KB / 365 Bytes gzipped +Sitemap stats: 12 links / 1 sitemaps / 0m00s +``` + +From this example we can see that: + +* The `create` block can contain Ruby code +* The Rails URL/path helper methods are made available to us, and +* The basic syntax for adding paths to the sitemap using `add` + +You can read more about `add` in the [XML Specification](http://www.sitemaps.org/protocol.html#xmlTagDefinitions). + +### Supported Options to `add` + +For other options be sure to check out the **Sitemap Extensions** section below. + +* `changefreq` - Default: `'weekly'` (String). + + Indicates how often the content of the page changes. One of `'always'`, `'hourly'`, `'daily'`, `'weekly'`, `'monthly'`, `'yearly'` or `'never'`. Example: + +```ruby +add '/contact_us', :changefreq => 'monthly' +``` + +* `lastmod` - Default: `Time.now` (Integer, Time, Date, DateTime, String). + + The date and time of last modification. Example: + +```ruby +add content_path(content), :lastmod => content.updated_at +``` + +* `host` - Default: `default_host` (String). + + Host to use when building the URL. It's not technically valid to specify a different host for a link in a sitemap according to the spec, but this facility exists in case you have a need. Example: + +```ruby +add '/login', :host => 'https://securehost.com' +``` + +* `priority` - Default: `0.5` (Float). + + The priority of the URL relative to other URLs on a scale from 0 to 1. Example: + +```ruby +add '/about', :priority => 0.75 +``` + +* `expires` - Optional (Integer, Time, Date, DateTime, String) + + [Request removal of this URL from search engines' indexes][expires]. Example (uses ActiveSupport): + +```ruby +add '/about', :expires => Time.now + 2.weeks +``` + +### Adding Links to the Sitemap Index + +Sometimes you may need to manually add some links to the sitemap index file. For example if you are generating your sitemaps incrementally you may want to create a sitemap index which includes the files which have already been generated. To achieve this you can use the `add_to_index` method which works exactly the same as the `add` method described above. + +It supports the same options as `add`, namely: + +* `changefreq` +* `lastmod` +* `host` + + The value for `host` defaults to whatever you have set as your `sitemaps_host`. Remember that the `sitemaps_host` is the host where your sitemaps reside. If your sitemaps are on the same host as your `default_host`, then the value for `default_host` is used. Example: + +```ruby +add_to_index '/mysitemap1.xml.gz', :host => 'http://sitemaphostingserver.com' +``` + +* `priority` + +An example: + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add_to_index '/mysitemap1.xml.gz' + add_to_index '/mysitemap2.xml.gz' + # ... +end +``` + +When you add links in this way, an index is always created, unless you've explicitly set `create_index` to `false`. + +### Accessing the LinkSet instance + +Sometimes you need to mess with the internals to do custom stuff. If you need access to the LinkSet instance from within `create()` you can use the `sitemap` method to do so. + +In this example, say we have already pre-generated three sitemap files: `sitemap1.xml.gz`, `sitemap2.xml.gz`, `sitemap3.xml.gz`. Now we want to start the sitemap generation at `sitemap4.xml.gz` and create a bunch of new sitemaps. There are a few ways we can do this, but this is an easy way: + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.namer = SitemapGenerator::SimpleNamer.new(:sitemap, :start => 4) +SitemapGenerator::Sitemap.create do + (1..3).each do |i| + add_to_index "sitemap#{i}.xml.gz" + end + add '/home' + add '/another' +end +``` + +The output looks something like this: + +``` +In /Users/karl/projects/sitemap_generator-test/public/ ++ sitemap4.xml.gz 3 links / 355 Bytes ++ sitemap.xml.gz 4 sitemaps / 242 Bytes +Sitemap stats: 3 links / 4 sitemaps / 0m00s +``` + +### Speeding Things Up + +For large ActiveRecord collections with thousands of records it is advisable to iterate through them in batches to avoid loading all records into memory at once. For this reason in the example above we use `Content.find_each` which is a batched iterator available since Rails 2.3.2, rather than `Content.all`. + + +## Customizing your Sitemaps + +SitemapGenerator supports a number of options which allow you to control every aspect of your sitemap generation. How they are named, where they are stored, the contents of the links and the location that the sitemaps will be hosted from can all be set. + +The options can be set in the following ways. + +On `SitemapGenerator::Sitemap`: + +```ruby +SitemapGenerator::Sitemap.default_host = 'http://example.com' +SitemapGenerator::Sitemap.sitemaps_path = 'sitemaps/' +``` + +These options will apply to all sitemaps. This is how you set most options. + +Passed as options in the call to `create`: + +```ruby +SitemapGenerator::Sitemap.create( + :default_host => 'http://example.com', + :sitemaps_path => 'sitemaps/') do + add '/home' +end +``` + +This is useful if you are setting a lot of options. + +Finally, passed as options in a call to `group`: + +```ruby +SitemapGenerator::Sitemap.create(:default_host => 'http://example.com') do + group(:filename => :somegroup, :sitemaps_path => 'sitemaps/') do + add '/home' + end +end +``` + +The options passed to `group` only apply to the links and sitemaps generated in the group. Sitemap Groups are useful to group links into specific sitemaps, or to set options that you only want to apply to the links in that group. + +### Sitemap Options + +The following options are supported. + +* `:create_index` - Supported values: `true`, `false`, `:auto`. Default: `:auto`. Whether to create a sitemap index file. If `true` an index file is always created regardless of how many sitemap files are generated. If `false` an index file is never created. If `:auto` an index file is created only when you have more than one sitemap file (i.e. you have added more than 50,000 - `SitemapGenerator::MAX_SITEMAP_LINKS` - links). + +* `:default_host` - String. Required. **Host including protocol** to use when building a link to add to your sitemap. For example `http://example.com`. Calling `add '/home'` would then generate the URL `http://example.com/home` and add that to the sitemap. You can pass a `:host` option in your call to `add` to override this value on a per-link basis. For example calling `add '/home', :host => 'https://example.com'` would generate the URL `https://example.com/home`, for that link only. + +* `:filename` - Symbol. The **base name for the files** that will be generated. The default value is `:sitemap`. This yields files with names like `sitemap.xml.gz`, `sitemap1.xml.gz`, `sitemap2.xml.gz`, `sitemap3.xml.gz` etc. If we now set the value to `:geo` the files would be named `geo.xml.gz`, `geo1.xml.gz`, `geo2.xml.gz`, `geo3.xml.gz` etc. + +* `:include_index` - Boolean. Whether to **add a link pointing to the sitemap index** to the current sitemap. This points search engines to your Sitemap Index to include it in the indexing of your site. 2012-07: This is now turned off by default because Google may complain about there being 'Nested Sitemap indexes'. Default is `false`. Turned off when `sitemaps_host` is set or within a `group()` block. + +* `:include_root` - Boolean. Whether to **add the root** url i.e. '/' to the current sitemap. Default is `true`. Turned off within a `group()` block. + +* `:public_path` - String. A **full or relative path** to the `public` directory or the directory you want to write sitemaps into. Defaults to `public/` under your application root or relative to the current working directory. + +* `:sitemaps_host` - String. **Host including protocol** to use when generating a link to a sitemap file i.e. the hostname of the server where the sitemaps are hosted. The value will differ from the hostname in your sitemap links. For example: `'http://amazon.aws.com/'`. Note that `include_index` is +automatically turned off when the `sitemaps_host` does not match `default_host`. +Because the link to the sitemap index file that would otherwise be added would point to a different host than the rest of the links in the sitemap. Something that the sitemap rules forbid. + +* `:namer` - A `SitemapGenerator::SimpleNamer` instance **for generating sitemap names**. You can read about Sitemap Namers by reading the API docs. Allows you to set the name, extension and number sequence for sitemap files, as well as modify the name of the first file in the sequence, which is often the index file. A simple example if we want to generate files like 'newname.xml.gz', 'newname1.xml.gz', etc is `SitemapGenerator::SimpleNamer.new(:newname)`. + +* `:sitemaps_path` - String. A **relative path** giving a directory under your `public_path` at which to write sitemaps. The difference between the two options is that the `sitemaps_path` is used when generating a link to a sitemap file. For example, if we set `SitemapGenerator::Sitemap.sitemaps_path = 'en/'` and use the default `public_path` sitemaps will be written to `public/en/`. The URL to the sitemap index would then be `http://example.com/en/sitemap.xml.gz`. + +* `:verbose` - Boolean. Whether to **output a sitemap summary** describing the sitemap files and giving statistics about your sitemap. Default is `false`. When using the Rake tasks `verbose` will be `true` unless you pass the `-s` option. + +* `:adapter` - Instance. The default adapter is a `SitemapGenerator::FileAdapter` which simply writes files to the filesystem. You can use a `SitemapGenerator::WaveAdapter` for uploading sitemaps to remote servers - useful for read-only hosts such as Heroku. Or you can provide an instance of your own class to provide custom behavior. Your class must define a write method which takes a `SitemapGenerator::Location` and raw XML data. + +* `:compress` - Specifies which files to compress with gzip. Default is `true`. Accepted values: + * `true` - Boolean; compress all files. + * `false` - Boolean; Do not compress any files. + * `:all_but_first` - Symbol; leave the first file uncompressed but compress all remaining files. + + The compression setting applies to groups too. So `:all_but_first` will have the same effect (the first file in the group will not be compressed, the rest will). So if you require different behaviour for your groups, pass in a `:compress` option e.g. `group(:compress => false) { add('/link') }` + +* `:max_sitemap_links` - Integer. The maximum number of links to put in each sitemap. Default is `SitemapGenerator::MAX_SITEMAPS_LINKS`, or 50,000. + +## Sitemap Groups + +Sitemap Groups is a powerful feature that is also very simple to use. + +* All options are supported except for `public_path`. You cannot change the public path. +* Groups inherit the options set on the default sitemap. +* `include_index` and `include_root` are `false` by default in a group. +* The sitemap index file is shared by all groups. +* Groups can handle any number of links. +* Group sitemaps are finalized (written out) as they get full and at the end of each group. +* It's a good idea to name your groups + +### A Groups Example + +When you create a new group you pass options which will apply only to that group. You pass a block to `group`. Inside your block you call `add` to add links to the group. + +Let's see an example that demonstrates a few interesting things about groups: + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add '/rss' + + group(:sitemaps_path => 'en/', :filename => :english) do + add '/home' + end + + group(:sitemaps_path => 'fr/', :filename => :french) do + add '/maison' + end +end +``` + +And the output from running the above: + +``` +In /Users/karl/projects/sitemap_generator-test/public/ ++ en/english.xml.gz 1 links / 328 Bytes ++ fr/french.xml.gz 1 links / 329 Bytes ++ sitemap1.xml.gz 2 links / 346 Bytes ++ sitemap.xml.gz 3 sitemaps / 252 Bytes +Sitemap stats: 4 links / 3 sitemaps / 0m00s +``` + +So we have two sitemaps with one link each and one sitemap with two links. The sitemaps from the groups are easy to spot by their filenames. They are `english.xml.gz` and `french.xml.gz`. They contain only one link each because **`include_index` and `include_root` are set to `false` by default** in a group. + +On the other hand, the default sitemap which we added `/rss` to has two links. The root url was added to it when we added `/rss`. If we hadn't added that link `sitemap1.xml.gz` would not have been created. So **when we are using groups, the default sitemap will only be created if we add links to it**. + +**The sitemap index file is shared by all groups**. You can change its filename by setting `SitemapGenerator::Sitemap.filename` or by passing the `:filename` option to `create`. + +The options you use when creating your groups will determine which and how many sitemaps are created. Groups will inherit the default sitemap when possible, and will continue the normal series. However a group will often specify an option which requires the links in that group to be in their own files. In this case, if the default sitemap were being used it would be finalized before starting the next sitemap in the series. + +If you have changed your sitemaps physical location in a group, then the default sitemap will not be used and it will be unaffected by the group. **Group sitemaps are finalized as they get full and at the end of each group.** + +### Using `group` without a block + +In some circumstances you may need to conditionally add records to a group or perform some other more complicated logic. In these cases you can instantiate a group instance, add links to it and finalize it manually. + +When called with a block, any partial sitemaps are automatically written out for you when the block terminates. Because this does not happen when instantiating manually, you must call `finalize!` on your group to ensure that it is written out and gets included in the sitemap index file. Note that group sitemaps will still automatically be finalized (written out) as they become full; calling `finalize!` is to handle the case when a sitemap is not full. + +An example: + +```ruby +SitemapGenerator::Sitemap.verbose = true +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + odds = group(:filename => :odds) + evens = group(:filename => :evens) + + (1..20).each do |i| + if (i % 2) == 0 + evens.add i.to_s + else + odds.add i.to_s end + end + + odds.finalize! + evens.finalize! +end +``` + +And the output from running the above: + +``` +In '/Users/kvarga/Projects/sitemap_generator-test/public/': ++ odds.xml.gz 10 links / 371 Bytes ++ evens.xml.gz 10 links / 371 Bytes ++ sitemap.xml.gz 2 sitemaps / 240 Bytes +Sitemap stats: 20 links / 2 sitemaps / 0m00s +``` + +## Sitemap Extensions + +### News Sitemaps + +A news item can be added to a sitemap URL by passing a `:news` hash to `add`. The hash must contain tags defined by the [News Sitemap][news_tags] specification. + +#### Example + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/index.html', :news => { + :publication_name => "Example", + :publication_language => "en", + :title => "My Article", + :keywords => "my article, articles about myself", + :stock_tickers => "SAO:PETR3", + :publication_date => "2011-08-22", + :access => "Subscription", + :genres => "PressRelease" + }) +end +``` + +#### Supported options + +* `:news` - Hash + * `:publication_name` + * `:publication_language` + * `:publication_date` + * `:genres` + * `:access` + * `:title` + * `:keywords` + * `:stock_tickers` + +### Image Sitemaps + +Images can be added to a sitemap URL by passing an `:images` array to `add`. Each item in the array must be a Hash containing tags defined by the [Image Sitemap][image_tags] specification. + +#### Example + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/index.html', :images => [{ + :loc => 'http://www.example.com/image.png', + :title => 'Image' }]) +end +``` + +#### Supported options + +* `:images` - Array of hashes + * `:loc` Required, location of the image + * `:caption` + * `:geo_location` + * `:title` + * `:license` + +### Video Sitemaps + +A video can be added to a sitemap URL by passing a `:video` Hash to `add()`. The Hash can contain tags defined by the [Video Sitemap specification][video_tags]. + +To add more than one video to a url, pass an array of video hashes using the `:videos` option. + +#### Example + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/index.html', :video => { + :thumbnail_loc => 'http://www.example.com/video1_thumbnail.png', + :title => 'Title', + :description => 'Description', + :content_loc => 'http://www.example.com/cool_video.mpg', + :tags => %w[one two three], + :category => 'Category' + }) +end +``` + +#### Supported options + +* `:video` or `:videos` - Hash or array of hashes, respectively + * `:thumbnail_loc` - Required. String, URL of the thumbnail image. + * `:title` - Required. String, title of the video. + * `:description` - Required. String, description of the video. + * `:content_loc` - Depends. String, URL. One of content_loc or player_loc must be present. + * `:player_loc` - Depends. String, URL. One of content_loc or player_loc must be present. + * `:allow_embed` - Boolean, attribute of player_loc. + * `:autoplay` - Boolean, default true. Attribute of player_loc. + * `:duration` - Recommended. Integer or string. Duration in seconds. + * `:expiration_date` - Recommended when applicable. The date after which the video will no longer be available. + * `:rating` - Optional + * `:view_count` - Optional. Integer or string. + * `:publication_date` - Optional + * `:tags` - Optional. Array of string tags. + * `:tag` - Optional. String, single tag. + * `:category` - Optional + * `:family_friendly`- Optional. Boolean + * `:gallery_loc` - Optional. String, URL. + * `:gallery_title` - Optional. Title attribute of the gallery location element + * `:uploader` - Optional. + * `:uploader_info` - Optional. Info attribute of uploader element + * `:price` - Optional. Only one price supported at this time + * `:price_currency` - Required. In [ISO_4217][iso_4217] format. + * `:price_type` - Optional. `rent` or `own` + * `:price_resolution` - Optional. `HD` or `SD` + * `:live` - Optional. Boolean. + * `:requires_subscription` - Optional. Boolean. + +### PageMap Sitemaps + +Pagemaps can be added by passing a `:pagemap` hash to `add`. The hash must contain a `:dataobjects` key with an array of dataobject hashes. Each dataobject hash contains a `:type` and `:id`, and an optional array of `:attributes`. Each attribute hash can contain two keys: `:name` and `:value`, with string values. For more information consult the [official documentation on PageMaps][using_pagemaps]. + +#### Supported options + +* `:pagemap` - Hash + * `:dataobjects` - Required, array of hashes + * `:type` - Required, string, type of the object + * `:id` - String, ID of the object + * `:attributes` - Array of hashes + * `:name` - Required, string, name of the attribute. + * `:value` - String, value of the attribute. + +#### Example: + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/blog/post', :pagemap => { + :dataobjects => [{ + :type => 'document', + :id => 'hibachi', + :attributes => [ + { :name => 'name', :value => 'Dragon' }, + { :name => 'review', :value => '3.5' }, + ] + }] + }) +end +``` + +### Alternate Links + +A useful feature for internationalization is to specify alternate links for a url. + +Alternate links can be added by passing an `:alternate` Hash to `add`. You can pass more than one alternate link by passing an array of hashes using the `:alternates` option. + +Check out the Google specification [here][alternate_links]. + +#### Example + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/index.html', :alternate => { + :href => 'http://www.example.de/index.html', + :lang => 'de', + :nofollow => true + }) +end +``` + +#### Supported options + +* `:alternate`/`:alternates` - Hash or array of hashes, respectively + * `:href` - Required, string. + * `:lang` - Optional, string. + * `:nofollow` - Optional, boolean. Used to mark link as "nofollow". + * `:media` - Optional, string. Specify [media targets for responsive design pages][media]. + +#### Alternates Example + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/index.html', :alternates => [ + { + :href => 'http://www.example.de/index.html', + :lang => 'de', + :nofollow => true + }, + { + :href => 'http://www.example.es/index.html', + :lang => 'es', + :nofollow => true + } + ]) +end +``` + +### Mobile Sitemaps + +Mobile sitemaps include a specific `` tag. + +Check out the Google specification [here][sitemap_mobile]. + +#### Example + +```ruby +SitemapGenerator::Sitemap.default_host = "http://www.example.com" +SitemapGenerator::Sitemap.create do + add('/index.html', :mobile => true) +end +``` -Known Bugs -======== +#### Supported options -- Sitemaps.org [states][sitemaps_org] that no Sitemap XML file should be more than 10Mb uncompressed. The plugin will warn you about this, but does nothing to avoid it (like move some URLs into a later file). -- There's no check on the size of a URL which [isn't supposed to exceed 2,048 bytes][sitemaps_xml]. -- Currently only supports one Sitemap Index file, which can contain 50,000 Sitemap files which can each contain 50,000 urls, so it _only_ supports up to 2,500,000,000 (2.5 billion) urls. I personally have no need of support for more urls, but plugin could be improved to support this. +* `:mobile` - Presence of this option will turn on the mobile flag regardless of value. -Thanks (in no particular order) -======== +## Compatibility -- [Karl Varga (aka Bear Grylls)](http://github.com/kjvarga) -- [Dan Pickett](http://github.com/dpickett) -- [Rob Biedenharn](http://github.com/rab) -- [Richie Vos](http://github.com/jerryvos) +Compatible with all versions of Rails and Ruby. +Ruby 1.9.3 support was dropped in Version 6.0.0 of this gem. +## Licence -Follow me on: ---------- +Released under the MIT License. See the (MIT-LICENSE)[MIT-LICENSE] file. -> Twitter: [twitter.com/adamsalter](http://twitter.com/adamsalter) -> Github: [github.com/adamsalter](http://github.com/adamsalter) +MIT. See the LICENSE.md file. -Copyright (c) 2009 Adam @ [Codebright.net][cb], released under the MIT license +Copyright (c) Karl Varga released under the MIT license +[canonical_repo]:http://github.com/kjvarga/sitemap_generator [enterprise_class]:https://twitter.com/dhh/status/1631034662 "I use enterprise in the same sense the Phusion guys do - i.e. Enterprise Ruby. Please don't look down on my use of the word 'enterprise' to represent being a cut above. It doesn't mean you ever have to work for a company the size of IBM. Or constantly fight inertia, writing crappy software, adhering to change management practices and spending hours in meetings... Not that there's anything wrong with that - Wait, what?" -[sitemap_engines]:http://en.wikipedia.org/wiki/Sitemap_index "http://en.wikipedia.org/wiki/Sitemap_index" -[sitemaps_org]:http://www.sitemaps.org/protocol.php "http://www.sitemaps.org/protocol.php" -[sitemaps_xml]:http://www.sitemaps.org/protocol.php#xmlTagDefinitions "XML Tag Definitions" +[sitemaps_org]:http://www.sitemaps.org/protocol.html "http://www.sitemaps.org/protocol.html" +[sitemaps_xml]:http://www.sitemaps.org/protocol.html#xmlTagDefinitions "XML Tag Definitions" [sitemap_generator_usage]:http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage "http://wiki.github.com/adamsalter/sitemap_generator/sitemapgenerator-usage" -[boost_juice]:http://www.boostjuice.com.au/ "Mmmm, sweet, sweet Boost Juice." -[cb]:http://codebright.net "http://codebright.net" +[sitemap_images]:http://www.google.com/support/webmasters/bin/answer.py?answer=178636 +[sitemap_video]:https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190 +[sitemap_news]:https://support.google.com/news/publisher/topic/2527688?hl=en&ref_topic=4359874 +[sitemap_mobile]:http://support.google.com/webmasters/bin/answer.py?hl=en&answer=34648 +[sitemap_pagemap]:https://developers.google.com/custom-search/docs/structured_data#addtositemap +[sitemap_protocol]:http://www.sitemaps.org/protocol.html +[video_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=80472#4 +[image_tags]:http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=178636 +[news_tags]:http://www.google.com/support/news_pub/bin/answer.py?answer=74288 +[remote_hosts]:https://github.com/kjvarga/sitemap_generator/wiki/Generate-Sitemaps-on-read-only-filesystems-like-Heroku +[include_index_change]:https://github.com/kjvarga/sitemap_generator/issues/70 +[ehoch]:https://github.com/ehoch +[alternate_links]:http://support.google.com/webmasters/bin/answer.py?hl=en&answer=2620865 +[using_pagemaps]:https://developers.google.com/custom-search/docs/structured_data#pagemaps +[iso_4217]:http://en.wikipedia.org/wiki/ISO_4217 +[media]:https://developers.google.com/webmasters/smartphone-sites/details +[expires]:https://support.google.com/customsearch/answer/2631051?hl=en +[google_cloud_storage_gem]:https://rubygems.org/gems/google-cloud-storage +[google_cloud_storage_authentication]:https://googleapis.dev/ruby/google-cloud-storage/latest/file.AUTHENTICATION.html +[google_cloud_storage_initializer]:https://github.com/googleapis/google-cloud-ruby/blob/master/google-cloud-storage/lib/google/cloud/storage.rb diff --git a/Rakefile b/Rakefile index bbce2126..cf6433c7 100644 --- a/Rakefile +++ b/Rakefile @@ -1,30 +1,48 @@ -require 'rake/testtask' -require 'find' - -begin - require 'jeweler' - Jeweler::Tasks.new do |s| - s.name = "sitemap_generator" - s.summary = %Q{Generate 'enterprise-class' Sitemaps for your Rails site using a simple 'Rails Routes'-like DSL and a single Rake task} - s.description = %Q{Install as a plugin or Gem to easily generate ['enterprise-class'][enterprise_class] Google Sitemaps for your Rails site, using a simple 'Rails Routes'-like DSL and a single rake task.} - s.email = "adam.salter@codebright.net " - s.homepage = "http://github.com/adamsalter/sitemap_generator" - s.authors = ["Adam Salter"] - s.files = FileList["[A-Z]*", "{bin,lib,rails,templates,tasks}/**/*"] - # s is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings - end - Jeweler::GemcutterTasks.new -rescue LoadError - puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler" +require 'bundler/setup' +Bundler.require + +desc 'Default: run spec tests.' +task :default => :spec + +require "rspec/core/rake_task" +RSpec::Core::RakeTask.new(:spec) do |spec| + spec.pattern = Dir.glob(['spec/sitemap_generator/**/*']) + spec.rspec_opts = ['--backtrace'] end -desc 'Default: run unit tests.' -task :default => :test +# +# Helpers +# + +def name; @name ||= Dir['*.gemspec'].first.split('.').first end +def version; File.read('VERSION').chomp end +def gemspec_file; "#{name}.gemspec" end +def gem_file; "#{name}-#{version}.gem" end + +# +# Release Tasks. To be run from the directory of this file. +# @see https://github.com/mojombo/rakegem +# -desc 'Test.' -Rake::TestTask.new(:test) do |t| - t.libs << 'lib' - t.pattern = 'test/**/*_test.rb' - t.verbose = true +desc "Build #{gem_file} into the pkg/ directory" +task :build => [:prepare] do + sh "mkdir -p pkg" + sh "gem build #{gemspec_file}" + sh "mv #{gem_file} pkg" + sh "bundle --local" end +desc "Chmod all files to be world readable" +task :prepare do + sh "chmod -R a+r *.* *" +end + +desc "Create tag v#{version}, build the gem and push to Git" +task :release => [:build] do + unless `git branch` =~ /^\* master$/ + puts "You must be on the master branch to release!" + exit! + end + sh "git tag v#{version}" + sh "git push origin master --tags" +end diff --git a/VERSION b/VERSION index ee1372d3..5e325424 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.2 +6.1.2 diff --git a/config/sitemap.rb b/config/sitemap.rb new file mode 100644 index 00000000..5871b655 --- /dev/null +++ b/config/sitemap.rb @@ -0,0 +1,46 @@ +SitemapGenerator::Sitemap.default_host = 'http://www.example.com' + +SitemapGenerator::Sitemap.create( + :include_root => true, :include_index => true, + :filename => :new_sitemaps, :sitemaps_path => 'fr/') do + + add('/one', :priority => 0.7, :changefreq => 'daily') + + # Test a new location and filename and sitemaps host + group(:sitemaps_path => 'en/', :filename => :xxx, + :sitemaps_host => 'http://newhost.com') do + + add '/two' + add '/three' + end + + # Test a simple namer. + group(:namer => SitemapGenerator::SimpleNamer.new(:abc, :start => 4, :zero => 3)) do + add '/four' + add '/five' + add '/six' + end + + # Test a simple namer + group(:namer => SitemapGenerator::SimpleNamer.new(:def)) do + add '/four' + add '/five' + add '/six' + end + + add '/seven' + + # This should be in a file of its own. + # Not technically valid to have a link with a different host, but people like + # to do strange things sometimes. + group(:sitemaps_host => 'http://exceptional.com') do + add '/eight' + add '/nine' + end + + add '/ten' + + # Not technically valid to have a link with a different host, but people like + # to do strange things sometimes + add '/merchant_path', :host => 'https://www.merchanthost.com' +end diff --git a/integration/Gemfile b/integration/Gemfile new file mode 100644 index 00000000..fb99bcc6 --- /dev/null +++ b/integration/Gemfile @@ -0,0 +1,16 @@ +# Default Gemfile for running tests in development. +source 'https://rubygems.org' + +ruby ">= 2.5.0" + +if RUBY_VERSION =~ /2.5.*/ + gem 'nokogiri', '1.12.5' +else + gem "nokogiri" +end +gem "rspec_junit_formatter" +gem "sitemap_generator", path: "../../" +gem "sqlite3" +gem "combustion" +gem "rails", "~> 6.1" +gem "rspec-rails" diff --git a/integration/Rakefile b/integration/Rakefile new file mode 100644 index 00000000..b0a78d69 --- /dev/null +++ b/integration/Rakefile @@ -0,0 +1,8 @@ +require 'bundler/setup' +Bundler.require +Combustion.initialize! +Combustion::Application.load_tasks +require 'sitemap_generator/tasks' + +desc 'Default: run spec tests.' +task :default => :spec diff --git a/integration/config.ru b/integration/config.ru new file mode 100644 index 00000000..2c8b242a --- /dev/null +++ b/integration/config.ru @@ -0,0 +1,7 @@ +require 'rubygems' +require 'bundler' + +Bundler.require :default, :development + +Combustion.initialize! :all +run Combustion::Application diff --git a/integration/gemfiles/rails_5_2.gemfile b/integration/gemfiles/rails_5_2.gemfile new file mode 100644 index 00000000..1040b175 --- /dev/null +++ b/integration/gemfiles/rails_5_2.gemfile @@ -0,0 +1,15 @@ +source "https://rubygems.org" + +ruby "< 2.7.0" + +if RUBY_VERSION =~ /2.5.*/ + gem 'nokogiri', '1.12.5' +else + gem "nokogiri" +end +gem "rspec_junit_formatter" +gem "sitemap_generator", path: "../../../" +gem "sqlite3" +gem "combustion" +gem "rails", "~> 5.2" +gem "rspec-rails" diff --git a/integration/gemfiles/rails_6_0.gemfile b/integration/gemfiles/rails_6_0.gemfile new file mode 100644 index 00000000..be366c65 --- /dev/null +++ b/integration/gemfiles/rails_6_0.gemfile @@ -0,0 +1,15 @@ +source "https://rubygems.org" + +ruby "< 3.0.0" + +if RUBY_VERSION =~ /2.5.*/ + gem 'nokogiri', '1.12.5' +else + gem "nokogiri" +end +gem "rspec_junit_formatter" +gem "sitemap_generator", path: "../../../" +gem "sqlite3" +gem "combustion" +gem "rails", "~> 6.0" +gem "rspec-rails" diff --git a/integration/gemfiles/rails_6_1.gemfile b/integration/gemfiles/rails_6_1.gemfile new file mode 100644 index 00000000..c30e90c7 --- /dev/null +++ b/integration/gemfiles/rails_6_1.gemfile @@ -0,0 +1,15 @@ +source "https://rubygems.org" + +ruby ">= 2.5.0" + +if RUBY_VERSION =~ /2.5.*/ + gem 'nokogiri', '1.12.5' +else + gem "nokogiri" +end +gem "rspec_junit_formatter" +gem "sitemap_generator", path: "../../../" +gem "sqlite3" +gem "combustion" +gem "rails", "~> 6.1" +gem "rspec-rails" diff --git a/integration/gemfiles/rails_7_0.gemfile b/integration/gemfiles/rails_7_0.gemfile new file mode 100644 index 00000000..3822d5e1 --- /dev/null +++ b/integration/gemfiles/rails_7_0.gemfile @@ -0,0 +1,11 @@ +source "https://rubygems.org" + +ruby ">= 2.7.0" + +gem "nokogiri" +gem "rspec_junit_formatter" +gem "sitemap_generator", path: "../../../" +gem "sqlite3" +gem "combustion" +gem "rails", "~> 7.0" +gem "rspec-rails" diff --git a/integration/spec/files/sitemap.create.rb b/integration/spec/files/sitemap.create.rb new file mode 100644 index 00000000..ec7464b4 --- /dev/null +++ b/integration/spec/files/sitemap.create.rb @@ -0,0 +1,15 @@ +require File.expand_path('./spec/internal/db/schema.rb') +require File.expand_path('./spec/internal/db/seed.rb') + +SitemapGenerator::Sitemap.default_host = "http://www.example.com" + +SitemapGenerator::Sitemap.create do + add contents_path, :priority => 0.7, :changefreq => 'daily' + + # add all individual articles + Content.all.each do |c| + add content_path(c), :lastmod => c.updated_at + end + + add "/merchant_path", :host => "https://www.example.com" +end diff --git a/integration/spec/files/sitemap.groups.rb b/integration/spec/files/sitemap.groups.rb new file mode 100644 index 00000000..a9865826 --- /dev/null +++ b/integration/spec/files/sitemap.groups.rb @@ -0,0 +1,46 @@ +SitemapGenerator::Sitemap.default_host = "http://www.example.com" + +SitemapGenerator::Sitemap.create( + :include_root => true, :include_index => true, + :filename => :new_sitemaps, :sitemaps_path => 'fr/') do + + add('/one', :priority => 0.7, :changefreq => 'daily') + + # Test a new location and filename and sitemaps host + group(:sitemaps_path => 'en/', :filename => :xxx, + :sitemaps_host => "http://newhost.com") do + + add '/two' + add '/three' + end + + # Test a simple namer. + group(:namer => SitemapGenerator::SimpleNamer.new(:abc, :start => 4, :zero => 3)) do + add '/four' + add '/five' + add '/six' + end + + # Test a simple namer + group(:namer => SitemapGenerator::SimpleNamer.new(:def)) do + add '/four' + add '/five' + add '/six' + end + + add '/seven' + + # This should be in a file of its own. + # Not technically valid to have a link with a different host, but people like + # to do strange things sometimes. + group(:sitemaps_host => "http://exceptional.com") do + add '/eight' + add '/nine' + end + + add '/ten' + + # Not technically valid to have a link with a different host, but people like + # to do strange things sometimes + add "/merchant_path", :host => "https://www.merchanthost.com" +end diff --git a/test/mock_app/app/models/content.rb b/integration/spec/internal/app/models/content.rb similarity index 55% rename from test/mock_app/app/models/content.rb rename to integration/spec/internal/app/models/content.rb index b59924d8..1872a963 100644 --- a/test/mock_app/app/models/content.rb +++ b/integration/spec/internal/app/models/content.rb @@ -1,2 +1,3 @@ class Content < ActiveRecord::Base + validates_presence_of :title end diff --git a/integration/spec/internal/config/database.yml b/integration/spec/internal/config/database.yml new file mode 100644 index 00000000..c33f9e64 --- /dev/null +++ b/integration/spec/internal/config/database.yml @@ -0,0 +1,10 @@ +test: + adapter: sqlite3 + database: ":memory:" + pool: 5 + timeout: 5000 +development: + adapter: sqlite3 + database: ":memory:" + pool: 5 + timeout: 5000 diff --git a/integration/spec/internal/config/routes.rb b/integration/spec/internal/config/routes.rb new file mode 100644 index 00000000..5f775d3b --- /dev/null +++ b/integration/spec/internal/config/routes.rb @@ -0,0 +1,3 @@ +Rails.application.routes.draw do + resources :contents +end diff --git a/integration/spec/internal/config/sitemap.rb b/integration/spec/internal/config/sitemap.rb new file mode 100644 index 00000000..ec7464b4 --- /dev/null +++ b/integration/spec/internal/config/sitemap.rb @@ -0,0 +1,15 @@ +require File.expand_path('./spec/internal/db/schema.rb') +require File.expand_path('./spec/internal/db/seed.rb') + +SitemapGenerator::Sitemap.default_host = "http://www.example.com" + +SitemapGenerator::Sitemap.create do + add contents_path, :priority => 0.7, :changefreq => 'daily' + + # add all individual articles + Content.all.each do |c| + add content_path(c), :lastmod => c.updated_at + end + + add "/merchant_path", :host => "https://www.example.com" +end diff --git a/integration/spec/internal/db/schema.rb b/integration/spec/internal/db/schema.rb new file mode 100644 index 00000000..c4b21cec --- /dev/null +++ b/integration/spec/internal/db/schema.rb @@ -0,0 +1,7 @@ + ActiveRecord::Schema.define(:version => 1) do + create_table "contents", force: true do |t| + t.string "title" + t.datetime "created_at" + t.datetime "updated_at" + end + end diff --git a/integration/spec/internal/db/seed.rb b/integration/spec/internal/db/seed.rb new file mode 100644 index 00000000..a7adeaf9 --- /dev/null +++ b/integration/spec/internal/db/seed.rb @@ -0,0 +1,3 @@ +(1..10).each do |i| + Content.create!(:title => "content #{i}") +end if Content.count == 0 diff --git a/integration/spec/internal/log/.gitignore b/integration/spec/internal/log/.gitignore new file mode 100644 index 00000000..bf0824e5 --- /dev/null +++ b/integration/spec/internal/log/.gitignore @@ -0,0 +1 @@ +*.log \ No newline at end of file diff --git a/integration/spec/sitemap_generator/alternate_sitemap_spec.rb b/integration/spec/sitemap_generator/alternate_sitemap_spec.rb new file mode 100644 index 00000000..f024f3a2 --- /dev/null +++ b/integration/spec/sitemap_generator/alternate_sitemap_spec.rb @@ -0,0 +1,101 @@ +require 'spec_helper' + +describe "SitemapGenerator" do + it "should not include media element unless provided" do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('media')).to be_nil + end + + it "should add alternate links to sitemap" do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html', + :media => 'only screen and (max-width: 640px)' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('href').value).to eq('http://www.example.de/link_with_alternate.html') + expect(alternate.attribute('media').value).to eq('only screen and (max-width: 640px)') + end + + it "should add alternate links to sitemap with rel nofollow" do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html', + :nofollow => true, + :media => 'only screen and (max-width: 640px)' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate nofollow') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('href').value).to eq('http://www.example.de/link_with_alternate.html') + expect(alternate.attribute('media').value).to eq('only screen and (max-width: 640px)') + end + + it "should support adding a single alternate link" do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternate => + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html', + :nofollow => true + } + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate nofollow') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('href').value).to eq('http://www.example.de/link_with_alternate.html') + end +end + diff --git a/integration/spec/sitemap_generator/tasks_spec.rb b/integration/spec/sitemap_generator/tasks_spec.rb new file mode 100644 index 00000000..d833d347 --- /dev/null +++ b/integration/spec/sitemap_generator/tasks_spec.rb @@ -0,0 +1,236 @@ +require 'spec_helper' + +class Holder + class << self + attr_accessor :executed + end +end + +describe "SitemapGenerator" do + describe "reset!" do + before :each do + SitemapGenerator::Sitemap.default_host # Force initialization of the LinkSet + end + + it "should set a new LinkSet instance" do + first = SitemapGenerator::Sitemap.instance_variable_get(:@link_set) + expect(first).to be_a(SitemapGenerator::LinkSet) + SitemapGenerator::Sitemap.reset! + second = SitemapGenerator::Sitemap.instance_variable_get(:@link_set) + expect(second).to be_a(SitemapGenerator::LinkSet) + expect(first).not_to be(second) + end + end + + describe "app root" do + it "should be set to the Rails root" do + expect(SitemapGenerator.app.root.to_s).to eq(Rails.root.to_s) + end + end + + describe "clean task" do + before :each do + FileUtils.mkdir_p(rails_path('public/')) + FileUtils.touch(rails_path('public/sitemap.xml.gz')) + end + + it "should delete the sitemaps" do + file_should_exist(rails_path('public/sitemap.xml.gz')) + Helpers.invoke_task('sitemap:clean') + file_should_not_exist(rails_path('public/sitemap.xml.gz')) + end + end + + describe "fresh install" do + before :each do + delete_sitemap_file_from_rails_app + Helpers.invoke_task('sitemap:install') + end + + it "should create config/sitemap.rb" do + file_should_exist(rails_path('config/sitemap.rb')) + end + + it "should create config/sitemap.rb matching template" do + sitemap_template = SitemapGenerator.templates.template_path(:sitemap_sample) + files_should_be_identical(rails_path('config/sitemap.rb'), sitemap_template) + end + end + + describe "install multiple times" do + before :each do + copy_sitemap_file_to_rails_app(:create) + Helpers.invoke_task('sitemap:install') + end + + it "should not overwrite config/sitemap.rb" do + sitemap_file = File.join(this_root, 'spec/files/sitemap.create.rb') + files_should_be_identical(sitemap_file, rails_path('config/sitemap.rb')) + end + end + + describe "generate sitemap with normal config" do + before :all do + SitemapGenerator::Sitemap.reset! + clean_sitemap_files_from_rails_app + copy_sitemap_file_to_rails_app(:create) + with_max_links(10) { execute_sitemap_config } + end + + it "should create sitemaps" do + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + file_should_not_exist(rails_path('public/sitemap3.xml.gz')) + end + + it "should have 13 links" do + expect(SitemapGenerator::Sitemap.link_count).to eq(13) + end + + it "index XML should validate" do + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + end + + it "sitemap XML should validate" do + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap1.xml.gz'), 'sitemap' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap2.xml.gz'), 'sitemap' + end + + it "index XML should not have excess whitespace" do + gzipped_xml_file_should_have_minimal_whitespace rails_path('public/sitemap.xml.gz') + end + + it "sitemap XML should not have excess whitespace" do + gzipped_xml_file_should_have_minimal_whitespace rails_path('public/sitemap1.xml.gz') + end + end + + describe "sitemap with groups" do + before :all do + SitemapGenerator::Sitemap.reset! + clean_sitemap_files_from_rails_app + copy_sitemap_file_to_rails_app(:groups) + with_max_links(2) { execute_sitemap_config } + @expected = %w[ + public/en/xxx.xml.gz + public/fr/abc3.xml.gz + public/fr/abc4.xml.gz + public/fr/def.xml.gz + public/fr/def1.xml.gz + public/fr/new_sitemaps.xml.gz + public/fr/new_sitemaps1.xml.gz + public/fr/new_sitemaps2.xml.gz + public/fr/new_sitemaps3.xml.gz + public/fr/new_sitemaps4.xml.gz] + @sitemaps = (@expected - %w[public/fr/new_sitemaps.xml.gz]) + end + + it "should create sitemaps" do + @expected.each { |file| file_should_exist(rails_path(file)) } + file_should_not_exist(rails_path('public/fr/new_sitemaps5.xml.gz')) + file_should_not_exist(rails_path('public/en/xxx1.xml.gz')) + file_should_not_exist(rails_path('public/fr/abc2.xml.gz')) + file_should_not_exist(rails_path('public/fr/abc5.xml.gz')) + file_should_not_exist(rails_path('public/fr/def2.xml.gz')) + end + + it "should have 16 links" do + expect(SitemapGenerator::Sitemap.link_count).to eq(16) + end + + it "index XML should validate" do + gzipped_xml_file_should_validate_against_schema rails_path('public/fr/new_sitemaps.xml.gz'), 'siteindex' + end + + it "index XML should not have excess whitespace" do + gzipped_xml_file_should_have_minimal_whitespace rails_path('public/fr/new_sitemaps.xml.gz') + end + + it "sitemaps XML should validate" do + @sitemaps.each { |file| gzipped_xml_file_should_validate_against_schema(rails_path(file), 'sitemap') } + end + + it "sitemap XML should not have excess whitespace" do + @sitemaps.each { |file| gzipped_xml_file_should_have_minimal_whitespace(rails_path(file)) } + end + end + + describe "sitemap path" do + before :each do + clean_sitemap_files_from_rails_app + ::SitemapGenerator::Sitemap.reset! + ::SitemapGenerator::Sitemap.default_host = 'http://test.local' + ::SitemapGenerator::Sitemap.filename = 'sitemap' + end + + it "should allow changing of the filename" do + ::SitemapGenerator::Sitemap.create(:filename => :geo_sitemap) do + add '/goerss' + add '/kml' + end + file_should_exist(rails_path('public/geo_sitemap.xml.gz')) + file_should_not_exist(rails_path('public/geo_sitemap1.xml.gz')) + end + + it "should support setting a sitemap path" do + directory_should_not_exist(rails_path('public/sitemaps/')) + + sm = ::SitemapGenerator::Sitemap + sm.sitemaps_path = 'sitemaps/' + sm.create do + add '/' + add '/another' + end + + file_should_exist(rails_path('public/sitemaps/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemaps/sitemap1.xml.gz')) + end + + it "should support setting a deeply nested sitemap path" do + directory_should_not_exist(rails_path('public/sitemaps/deep/directory')) + + sm = ::SitemapGenerator::Sitemap + sm.sitemaps_path = 'sitemaps/deep/directory/' + sm.create do + add '/' + add '/another' + add '/yet-another' + end + + file_should_exist(rails_path('public/sitemaps/deep/directory/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemaps/deep/directory/sitemap1.xml.gz')) + end + end + + describe "external dependencies" do + describe "rails" do + before :each do + @rails = Rails + Object.send(:remove_const, :Rails) + end + + after :each do + Object::Rails = @rails + end + + it "should work outside of Rails" do + expect(defined?(Rails)).to be_nil + expect { ::SitemapGenerator::LinkSet.new }.not_to raise_exception + end + end + end + + protected + + # Better would be to just invoke the environment task and use + # the interpreter. + def execute_sitemap_config + if Holder.executed + SitemapGenerator::Interpreter.run + else + Holder.executed = true + Helpers.invoke_task('sitemap:refresh:no_ping') + end + end +end diff --git a/integration/spec/spec_helper.rb b/integration/spec/spec_helper.rb new file mode 100644 index 00000000..0e3b4196 --- /dev/null +++ b/integration/spec/spec_helper.rb @@ -0,0 +1,37 @@ +require 'bundler/setup' +Bundler.require +# Setting load_schema: false results in "uninitialized constant ActiveRecord::MigrationContext" error +Combustion.initialize! :active_record, :action_view, database_reset: false +Combustion::Application.load_tasks +require 'sitemap_generator/tasks' # Combusition fails to load these tasks +SitemapGenerator.verbose = false + +require 'rspec/rails' +require 'support/sitemap_macros' +require '../spec/support/file_macros' +require '../spec/support/xml_macros' + +RSpec.configure do |config| + config.include(FileMacros) + config.include(XmlMacros) + config.include(SitemapMacros) + + config.after(:all) do + clean_sitemap_files_from_rails_app + copy_sitemap_file_to_rails_app(:create) + end +end + +module Helpers + extend self + + # Invoke and then re-enable the task so it can be called multiple times. + # KJV: Tasks are only being run once despite being re-enabled. + # + # task task symbol/string + def invoke_task(task) + Rake.send(:verbose, false) + Rake::Task[task.to_s].invoke + Rake::Task[task.to_s].reenable + end +end diff --git a/integration/spec/support/sitemap_macros.rb b/integration/spec/support/sitemap_macros.rb new file mode 100644 index 00000000..647df2de --- /dev/null +++ b/integration/spec/support/sitemap_macros.rb @@ -0,0 +1,32 @@ +module SitemapMacros + def with_max_links(num) + original = SitemapGenerator::Sitemap.max_sitemap_links + SitemapGenerator::Sitemap.max_sitemap_links = num + yield + ensure + SitemapGenerator::Sitemap.max_sitemap_links = original + end + + def this_root + @this_root ||= File.expand_path(File.join(File.dirname(__FILE__), '../../')) + end + + def rails_path(file) + SitemapGenerator.app.root + file + end + + def copy_sitemap_file_to_rails_app(extension) + FileUtils.cp(File.join(this_root, "spec/files/sitemap.#{extension}.rb"), SitemapGenerator.app.root + 'config/sitemap.rb') + end + + def delete_sitemap_file_from_rails_app + FileUtils.remove(SitemapGenerator.app.root + 'config/sitemap.rb') + rescue + nil + end + + def clean_sitemap_files_from_rails_app + FileUtils.rm_rf(rails_path('public/')) + FileUtils.mkdir_p(rails_path('public/')) + end +end diff --git a/lib/capistrano/sitemap_generator.rb b/lib/capistrano/sitemap_generator.rb new file mode 100644 index 00000000..bf885bdb --- /dev/null +++ b/lib/capistrano/sitemap_generator.rb @@ -0,0 +1 @@ +load File.expand_path(File.join('..', 'tasks', 'sitemap_generator.cap'), __FILE__) diff --git a/lib/capistrano/tasks/sitemap_generator.cap b/lib/capistrano/tasks/sitemap_generator.cap new file mode 100644 index 00000000..2604ab58 --- /dev/null +++ b/lib/capistrano/tasks/sitemap_generator.cap @@ -0,0 +1,34 @@ +namespace :sitemap do + desc 'Create sitemap and ping search engines' + task :refresh do + on roles fetch(:sitemap_roles, :web) do + within release_path do + with rails_env: (fetch(:rails_env) || fetch(:stage)) do + execute :rake, "sitemap:refresh" + end + end + end + end + + desc 'Create sitemap without pinging search engines' + task :create do + on roles fetch(:sitemap_roles, :web) do + within release_path do + with rails_env: (fetch(:rails_env) || fetch(:stage)) do + execute :rake, "sitemap:create" + end + end + end + end + + desc 'Clean up sitemaps in sitemap_generator path' + task :clean do + on roles fetch(:sitemap_roles, :web) do + within release_path do + with rails_env: (fetch(:rails_env) || fetch(:stage)) do + execute :rake, "sitemap:clean" + end + end + end + end +end diff --git a/lib/sitemap_generator.rb b/lib/sitemap_generator.rb index 4bde6c9f..5b52f6d9 100644 --- a/lib/sitemap_generator.rb +++ b/lib/sitemap_generator.rb @@ -1,19 +1,87 @@ -require 'sitemap_generator/mapper' -require 'sitemap_generator/link' +require 'sitemap_generator/simple_namer' +require 'sitemap_generator/builder' require 'sitemap_generator/link_set' -require 'sitemap_generator/helper' +require 'sitemap_generator/templates' +require 'sitemap_generator/utilities' +require 'sitemap_generator/application' +require 'sitemap_generator/sitemap_location' module SitemapGenerator - class < 'http://www.google.com/schemas/sitemap-image/1.1', + 'mobile' => 'http://www.google.com/schemas/sitemap-mobile/1.0', + 'news' => 'http://www.google.com/schemas/sitemap-news/0.9', + 'pagemap' => 'http://www.google.com/schemas/sitemap-pagemap/1.0', + 'video' => 'http://www.google.com/schemas/sitemap-video/1.1' + } + + # Lazy-initialize the LinkSet instance + Sitemap = (Class.new do + def method_missing(*args, &block) + (@link_set ||= reset!).send(*args, &block) + end + + def respond_to?(name, include_private = false) + (@link_set ||= reset!).respond_to?(name, include_private) + end + + # Use a new LinkSet instance + def reset! + @link_set = LinkSet.new + end + end).new + end + + class << self + attr_accessor :root, :app, :templates + attr_writer :yield_sitemap, :verbose end - self.root = File.expand_path(File.join(File.dirname(__FILE__), '../')) - self.templates = { - :sitemap_index => File.join(self.root, 'templates/sitemap_index.builder'), - :sitemap_xml => File.join(self.root, 'templates/xml_sitemap.builder'), - :sitemap_sample => File.join(self.root, 'templates/sitemap.rb'), - } - - Sitemap = LinkSet.new + @yield_sitemap = nil + + # Global default for the verbose setting. + def self.verbose + if @verbose.nil? + @verbose = if SitemapGenerator::Utilities.truthy?(ENV['VERBOSE']) + true + elsif SitemapGenerator::Utilities.falsy?(ENV['VERBOSE']) + false + else + nil + end + else + @verbose + end + end + + # Returns true if we should yield the sitemap instance to the block, false otherwise. + def self.yield_sitemap? + !!@yield_sitemap + end + + self.root = File.expand_path(File.join(File.dirname(__FILE__), '../')) # Root of the install dir, not the Rails app + self.templates = SitemapGenerator::Templates.new(self.root) + self.app = SitemapGenerator::Application.new end - + +require 'sitemap_generator/railtie' if SitemapGenerator.app.is_at_least_rails3? diff --git a/lib/sitemap_generator/adapters/aws_sdk_adapter.rb b/lib/sitemap_generator/adapters/aws_sdk_adapter.rb new file mode 100644 index 00000000..1be269c4 --- /dev/null +++ b/lib/sitemap_generator/adapters/aws_sdk_adapter.rb @@ -0,0 +1,60 @@ +if !defined?(Aws::S3::Resource) or !defined?(Aws::Credentials) + raise "Error: `Aws::S3::Resource` and/or `Aws::Credentials` are not defined.\n\n"\ + "Please `require 'aws-sdk'` - or another library that defines these classes." +end + +module SitemapGenerator + # Class for uploading sitemaps to an S3 bucket using the AWS SDK gem. + class AwsSdkAdapter + # Specify your AWS bucket name, credentials, and/or region. By default + # the AWS SDK will auto-detect your credentials and region, but you can use + # the following options to configure - or override - them manually: + # + # Options: + # :aws_access_key_id [String] Your AWS access key id + # :aws_secret_access_key [String] Your AWS secret access key + # :aws_region [String] Your AWS region + # + # Requires Aws::S3::Resource and Aws::Credentials to be defined. + # + # @param [String] bucket Name of the S3 bucket + # @param [Hash] options AWS credential overrides, see above + def initialize(bucket, options = {}) + @bucket = bucket + @aws_access_key_id = options[:aws_access_key_id] + @aws_secret_access_key = options[:aws_secret_access_key] + @aws_region = options[:aws_region] + @aws_endpoint = options[:aws_endpoint] + end + + # Call with a SitemapLocation and string data + def write(location, raw_data) + SitemapGenerator::FileAdapter.new.write(location, raw_data) + s3_object = s3_resource.bucket(@bucket).object(location.path_in_public) + s3_object.upload_file(location.path, + acl: 'public-read', + cache_control: 'private, max-age=0, no-cache', + content_type: location[:compress] ? 'application/x-gzip' : 'application/xml' + ) + end + + private + + def s3_resource + @s3_resource ||= Aws::S3::Resource.new(s3_resource_options) + end + + def s3_resource_options + options = {} + options[:region] = @aws_region if !@aws_region.nil? + options[:endpoint] = @aws_endpoint if !@aws_endpoint.nil? + if !@aws_access_key_id.nil? && !@aws_secret_access_key.nil? + options[:credentials] = Aws::Credentials.new( + @aws_access_key_id, + @aws_secret_access_key + ) + end + options + end + end +end diff --git a/lib/sitemap_generator/adapters/file_adapter.rb b/lib/sitemap_generator/adapters/file_adapter.rb new file mode 100644 index 00000000..a096e241 --- /dev/null +++ b/lib/sitemap_generator/adapters/file_adapter.rb @@ -0,0 +1,42 @@ +module SitemapGenerator + # Class for writing out data to a file. + class FileAdapter + # Write data to a file. + # @param location - File object giving the full path and file name of the file. + # If the location specifies a directory(ies) which does not exist, the directory(ies) + # will be created for you. If the location path ends with `.gz` the data will be + # compressed prior to being written out. Otherwise the data will be written out + # unchanged. + # @param raw_data - data to write to the file. + def write(location, raw_data) + # Ensure that the directory exists + dir = location.directory + if !File.exist?(dir) + FileUtils.mkdir_p(dir) + elsif !File.directory?(dir) + raise SitemapError.new("#{dir} should be a directory!") + end + + stream = open(location.path, 'wb') + if location.path.to_s =~ /.gz$/ + gzip(stream, raw_data) + else + plain(stream, raw_data) + end + end + + # Write `data` to a stream, passing the data through a GzipWriter + # to compress it. + def gzip(stream, data) + gz = Zlib::GzipWriter.new(stream) + gz.write data + gz.close + end + + # Write `data` to a stream as is. + def plain(stream, data) + stream.write data + stream.close + end + end +end diff --git a/lib/sitemap_generator/adapters/fog_adapter.rb b/lib/sitemap_generator/adapters/fog_adapter.rb new file mode 100644 index 00000000..dedcf37e --- /dev/null +++ b/lib/sitemap_generator/adapters/fog_adapter.rb @@ -0,0 +1,32 @@ +if !defined?(Fog::Storage) + raise "Error: `Fog::Storage` is not defined.\n\n"\ + "Please `require 'fog'` - or another library that defines this class." +end + +module SitemapGenerator + # Class for uploading sitemaps to a Fog supported endpoint. + class FogAdapter + # Requires Fog::Storage to be defined. + # + # @param [Hash] opts Fog configuration options + # @option :fog_credentials [Hash] Credentials for connecting to the remote server + # @option :fog_directory [String] Your AWS S3 bucket or similar directory name + def initialize(opts = {}) + @fog_credentials = opts[:fog_credentials] + @fog_directory = opts[:fog_directory] + end + + # Call with a SitemapLocation and string data + def write(location, raw_data) + SitemapGenerator::FileAdapter.new.write(location, raw_data) + + storage = Fog::Storage.new(@fog_credentials) + directory = storage.directories.new(:key => @fog_directory) + directory.files.create( + :key => location.path_in_public, + :body => File.open(location.path), + :public => true + ) + end + end +end diff --git a/lib/sitemap_generator/adapters/google_storage_adapter.rb b/lib/sitemap_generator/adapters/google_storage_adapter.rb new file mode 100644 index 00000000..a9ebaafe --- /dev/null +++ b/lib/sitemap_generator/adapters/google_storage_adapter.rb @@ -0,0 +1,37 @@ +if !defined?(Google::Cloud::Storage) + raise "Error: `Google::Cloud::Storage` is not defined.\n\n"\ + "Please `require 'google/cloud/storage'` - or another library that defines this class." +end + +module SitemapGenerator + # Class for uploading sitemaps to a Google Storage using `google-cloud-storage` gem. + class GoogleStorageAdapter + # Requires Google::Cloud::Storage to be defined. + # + # @param [Hash] opts Google::Cloud::Storage configuration options. + # @option :bucket [String] Required. Name of Google Storage Bucket where the file is to be uploaded. + # + # All options other than the `:bucket` option are passed to the `Google::Cloud::Storage.new` + # initializer. See https://googleapis.dev/ruby/google-cloud-storage/latest/file.AUTHENTICATION.html + # for all the supported environment variables and https://github.com/googleapis/google-cloud-ruby/blob/master/google-cloud-storage/lib/google/cloud/storage.rb + # for supported options. + # + # Suggested Options: + # @option :credentials [String] Path to Google service account JSON file, or JSON contents. + # @option :project_id [String] Google Accounts project id where the storage bucket resides. + def initialize(opts = {}) + opts = opts.clone + @bucket = opts.delete(:bucket) + @storage_options = opts + end + + # Call with a SitemapLocation and string data + def write(location, raw_data) + SitemapGenerator::FileAdapter.new.write(location, raw_data) + + storage = Google::Cloud::Storage.new(@storage_options) + bucket = storage.bucket(@bucket) + bucket.create_file(location.path, location.path_in_public, acl: 'public') + end + end +end diff --git a/lib/sitemap_generator/adapters/s3_adapter.rb b/lib/sitemap_generator/adapters/s3_adapter.rb new file mode 100644 index 00000000..f85b7b86 --- /dev/null +++ b/lib/sitemap_generator/adapters/s3_adapter.rb @@ -0,0 +1,54 @@ +if !defined?(Fog::Storage) + raise "Error: `Fog::Storage` is not defined.\n\n"\ + "Please `require 'fog-aws'` - or another library that defines this class." +end + +module SitemapGenerator + # Class for uploading sitemaps to an S3 bucket using the Fog gem. + class S3Adapter + # Requires Fog::Storage to be defined. + # + # @param [Hash] opts Fog configuration options + # @option :aws_access_key_id [String] Your AWS access key id + # @option :aws_secret_access_key [String] Your AWS secret access key + # @option :fog_provider [String] + # @option :fog_directory [String] + # @option :fog_region [String] + # @option :fog_path_style [String] + # @option :fog_storage_options [Hash] Other options to pass to `Fog::Storage` + def initialize(opts = {}) + @aws_access_key_id = opts[:aws_access_key_id] || ENV['AWS_ACCESS_KEY_ID'] + @aws_secret_access_key = opts[:aws_secret_access_key] || ENV['AWS_SECRET_ACCESS_KEY'] + @fog_provider = opts[:fog_provider] || ENV['FOG_PROVIDER'] + @fog_directory = opts[:fog_directory] || ENV['FOG_DIRECTORY'] + @fog_region = opts[:fog_region] || ENV['FOG_REGION'] + @fog_path_style = opts[:fog_path_style] || ENV['FOG_PATH_STYLE'] + @fog_storage_options = opts[:fog_storage_options] || {} + end + + # Call with a SitemapLocation and string data + def write(location, raw_data) + SitemapGenerator::FileAdapter.new.write(location, raw_data) + + credentials = { :provider => @fog_provider } + + if @aws_access_key_id && @aws_secret_access_key + credentials[:aws_access_key_id] = @aws_access_key_id + credentials[:aws_secret_access_key] = @aws_secret_access_key + else + credentials[:use_iam_profile] = true + end + + credentials[:region] = @fog_region if @fog_region + credentials[:path_style] = @fog_path_style if @fog_path_style + + storage = Fog::Storage.new(@fog_storage_options.merge(credentials)) + directory = storage.directories.new(:key => @fog_directory) + directory.files.create( + :key => location.path_in_public, + :body => File.open(location.path), + :public => true + ) + end + end +end diff --git a/lib/sitemap_generator/adapters/wave_adapter.rb b/lib/sitemap_generator/adapters/wave_adapter.rb new file mode 100644 index 00000000..4823e57d --- /dev/null +++ b/lib/sitemap_generator/adapters/wave_adapter.rb @@ -0,0 +1,21 @@ +if !defined?(::CarrierWave::Uploader::Base) + raise "Error: `CarrierWave::Uploader::Base` is not defined.\n\n"\ + "Please `require 'carrierwave'` - or another library that defines this class." +end + +module SitemapGenerator + # Class for uploading sitemaps to a remote server using the CarrierWave gem. + class WaveAdapter < ::CarrierWave::Uploader::Base + attr_accessor :store_dir + + # Call with a SitemapLocation and string data + def write(location, raw_data) + SitemapGenerator::FileAdapter.new.write(location, raw_data) + directory = File.dirname(location.path_in_public) + if directory != '.' + self.store_dir = directory + end + store!(open(location.path, 'rb')) + end + end +end diff --git a/lib/sitemap_generator/application.rb b/lib/sitemap_generator/application.rb new file mode 100644 index 00000000..4dd5fd4f --- /dev/null +++ b/lib/sitemap_generator/application.rb @@ -0,0 +1,46 @@ +require 'pathname' + +module SitemapGenerator + class Application + def is_rails? + !!defined?(Rails::VERSION) + end + + # Returns a boolean indicating whether this environment is Rails 3 + # + # @return [Boolean] + def is_at_least_rails3? + is_rails? && Rails.version.to_f >= 3 + rescue + false # Rails.version defined in 2.1.0 + end + + def root + Pathname.new(rails_root || Dir.getwd) + end + + protected + + # Returns the root of the Rails application, + # if this is running in a Rails context. + # Returns `nil` if no such root is defined. + # + # @return [String, nil] + def rails_root + return ::Rails.root.to_s if defined?(::Rails.root) && ::Rails.root + return RAILS_ROOT.to_s if defined?(RAILS_ROOT) + nil + end + + # Returns the environment of the Rails application, + # if this is running in a Rails context. + # Returns `nil` if no such environment is defined. + # + # @return [String, nil] + def rails_env + return ::Rails.env.to_s if defined?(::Rails.env) + return RAILS_ENV.to_s if defined?(RAILS_ENV) + nil + end + end +end diff --git a/lib/sitemap_generator/builder.rb b/lib/sitemap_generator/builder.rb new file mode 100644 index 00000000..a1d4fa27 --- /dev/null +++ b/lib/sitemap_generator/builder.rb @@ -0,0 +1,8 @@ +require 'sitemap_generator/builder/sitemap_file' +require 'sitemap_generator/builder/sitemap_index_file' +require 'sitemap_generator/builder/sitemap_url' +require 'sitemap_generator/builder/sitemap_index_url' + +module SitemapGenerator::Builder + LinkHolder = Struct.new(:link, :options) +end \ No newline at end of file diff --git a/lib/sitemap_generator/builder/sitemap_file.rb b/lib/sitemap_generator/builder/sitemap_file.rb new file mode 100644 index 00000000..9979100d --- /dev/null +++ b/lib/sitemap_generator/builder/sitemap_file.rb @@ -0,0 +1,173 @@ +require 'zlib' +require 'fileutils' +require 'sitemap_generator/helpers/number_helper' + +module SitemapGenerator + module Builder + # + # General Usage: + # + # sitemap = SitemapFile.new(:location => SitemapLocation.new(...)) + # sitemap.add('/', { ... }) <- add a link to the sitemap + # sitemap.finalize! <- write the sitemap file and freeze the object to protect it from further modification + # + class SitemapFile + include SitemapGenerator::Helpers::NumberHelper + attr_reader :link_count, :filesize, :location, :news_count + + # === Options + # + # * location - a SitemapGenerator::SitemapLocation instance or a Hash of options + # from which a SitemapLocation will be created for you. See `SitemapGenerator::SitemapLocation` for + # the supported list of options. + def initialize(opts={}) + @location = opts.is_a?(Hash) ? SitemapGenerator::SitemapLocation.new(opts) : opts + @link_count = 0 + @news_count = 0 + @xml_content = '' # XML urlset content + @xml_wrapper_start = <<-HTML + + + HTML + @xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip! + @xml_wrapper_end = %q[] + @filesize = SitemapGenerator::Utilities.bytesize(@xml_wrapper_start) + SitemapGenerator::Utilities.bytesize(@xml_wrapper_end) + @written = false + @reserved_name = nil # holds the name reserved from the namer + @frozen = false # rather than actually freeze, use this boolean + end + + # If a name has been reserved, use the last modified time from the file. + # Otherwise return nil. We don't want to prematurely assign a name + # for this sitemap if one has not yet been reserved, because we may + # mess up the name-assignment sequence. + def lastmod + File.mtime(location.path) if location.reserved_name? + rescue + nil + end + + def empty? + @link_count == 0 + end + + # Return a boolean indicating whether the sitemap file can fit another link + # of bytes bytes in size. You can also pass a string and the + # bytesize will be calculated for you. + def file_can_fit?(bytes) + bytes = bytes.is_a?(String) ? SitemapGenerator::Utilities.bytesize(bytes) : bytes + (@filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && @link_count < max_sitemap_links && @news_count < SitemapGenerator::MAX_SITEMAP_NEWS + end + + # Add a link to the sitemap file. + # + # If a link cannot be added, for example if the file is too large or the link + # limit has been reached, a SitemapGenerator::SitemapFullError exception is raised + # and the sitemap is finalized. + # + # If the Sitemap has already been finalized a SitemapGenerator::SitemapFinalizedError + # exception is raised. + # + # Return the new link count. + # + # Call with: + # sitemap_url - a SitemapUrl instance + # sitemap, options - a Sitemap instance and options hash + # path, options - a path for the URL and options hash. For supported options + # see the SitemapGenerator::Builder::SitemapUrl class. + # + # The link added to the sitemap will use the host from its location object + # if no host has been specified. + def add(link, options={}) + raise SitemapGenerator::SitemapFinalizedError if finalized? + + sitemap_url = if link.is_a?(SitemapUrl) + link + else + options[:host] ||= @location.host + SitemapUrl.new(link, options) + end + + xml = sitemap_url.to_xml + raise SitemapGenerator::SitemapFullError if !file_can_fit?(xml) + + if sitemap_url.news? + @news_count += 1 + end + + # Add the XML to the sitemap + @xml_content << xml + @filesize += SitemapGenerator::Utilities.bytesize(xml) + @link_count += 1 + end + + # "Freeze" this object. Actually just flags it as frozen. + # + # A SitemapGenerator::SitemapFinalizedError exception is raised if the Sitemap + # has already been finalized. + def finalize! + raise SitemapGenerator::SitemapFinalizedError if finalized? + @frozen = true + end + + def finalized? + @frozen + end + + # Write out the sitemap and free up memory. + # + # All the xml content in the instance is cleared, but attributes like + # filesize are still available. + # + # A SitemapGenerator::SitemapError exception is raised if the file has + # already been written. + def write + raise SitemapGenerator::SitemapError.new("Sitemap already written!") if written? + finalize! unless finalized? + reserve_name + @location.write(@xml_wrapper_start + @xml_content + @xml_wrapper_end, link_count) + @xml_content = @xml_wrapper_start = @xml_wrapper_end = '' + @written = true + end + + # Return true if this file has been written out to disk + def written? + @written + end + + # Reserve a name from the namer unless one has already been reserved. + # Safe to call more than once. + def reserve_name + @reserved_name ||= @location.reserve_name + end + + # Return a boolean indicating whether a name has been reserved + def reserved_name? + !!@reserved_name + end + + # Return a new instance of the sitemap file with the same options, + # and the next name in the sequence. + def new + location = @location.dup + location.delete(:filename) if location.namer + self.class.new(location) + end + + def max_sitemap_links + @location[:max_sitemap_links] || SitemapGenerator::MAX_SITEMAP_LINKS + end + end + end +end diff --git a/lib/sitemap_generator/builder/sitemap_index_file.rb b/lib/sitemap_generator/builder/sitemap_index_file.rb new file mode 100644 index 00000000..9a262f25 --- /dev/null +++ b/lib/sitemap_generator/builder/sitemap_index_file.rb @@ -0,0 +1,150 @@ +module SitemapGenerator + module Builder + class SitemapIndexFile < SitemapFile + + # === Options + # + # * location - a SitemapGenerator::SitemapIndexLocation instance or a Hash of options + # from which a SitemapLocation will be created for you. + def initialize(opts={}) + @location = opts.is_a?(Hash) ? SitemapGenerator::SitemapIndexLocation.new(opts) : opts + @link_count = 0 + @sitemaps_link_count = 0 + @xml_content = '' # XML urlset content + @xml_wrapper_start = <<-HTML + + + HTML + @xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip! + @xml_wrapper_end = %q[] + @filesize = SitemapGenerator::Utilities.bytesize(@xml_wrapper_start) + SitemapGenerator::Utilities.bytesize(@xml_wrapper_end) + @written = false + @reserved_name = nil # holds the name reserved from the namer + @frozen = false # rather than actually freeze, use this boolean + @first_sitemap = nil # reference to the first thing added to this index + # Store the URL of the first sitemap added because if create_index is + # false this is the "index" URL + @first_sitemap_url = nil + @create_index = nil + end + + # Finalize sitemaps as they are added to the index. + # If it's the first sitemap, finalize it but don't + # write it out, because we don't yet know if we need an index. If it's + # the second sitemap, we know we need an index, so reserve a name for the + # index, and go and write out the first sitemap. If it's the third or + # greater sitemap, just finalize and write it out as usual, nothing more + # needs to be done. + # + # If a link is being added to the index manually as a string, then we + # can assume that the index is required (unless create_index is false of course). + # This seems like the logical thing to do. + alias_method :super_add, :add + def add(link, options={}) + if file = link.is_a?(SitemapFile) && link + @sitemaps_link_count += file.link_count + file.finalize! unless file.finalized? + + # First link. If it's a SitemapFile store a reference to it and the options + # so that we can create a URL from it later. We can't create the URL yet + # because doing so fixes the sitemap file's name, and we have to wait to see + # if we have more than one link in the index before we can know who gets the + # first name (the index, or the sitemap). If the item is not a SitemapFile, + # then it has been manually added and we can be sure that the user intends + # for there to be an index. + if @link_count == 0 + @first_sitemap = SitemapGenerator::Builder::LinkHolder.new(file, options) + @link_count += 1 # pretend it's added, but don't add it yet + else + # need an index so make sure name is reserved and first sitemap is written out + reserve_name unless @location.create_index == false + write_first_sitemap + file.write + super(SitemapGenerator::Builder::SitemapIndexUrl.new(file, options)) + end + else + # A link is being added manually. Obviously the user wants an index. + # This overrides the create_index setting. + unless @location.create_index == false + @create_index = true + reserve_name + end + + # Use the host from the location if none provided + options[:host] ||= @location.host + super(SitemapGenerator::Builder::SitemapIndexUrl.new(link, options)) + end + end + + # Return a boolean indicating whether the sitemap file can fit another link + # of bytes bytes in size. You can also pass a string and the + # bytesize will be calculated for you. + def file_can_fit?(bytes) + bytes = bytes.is_a?(String) ? SitemapGenerator::Utilities.bytesize(bytes) : bytes + (@filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && @link_count < SitemapGenerator::MAX_SITEMAP_FILES + end + + # Return the total number of links in all sitemaps reference by this index file + def total_link_count + @sitemaps_link_count + end + + def stats_summary(opts={}) + str = "Sitemap stats: #{number_with_delimiter(@sitemaps_link_count)} links / #{@link_count} sitemaps" + str += " / %dm%02ds" % opts[:time_taken].divmod(60) if opts[:time_taken] + end + + def finalize! + raise SitemapGenerator::SitemapFinalizedError if finalized? + reserve_name if create_index? + write_first_sitemap + @frozen = true + end + + # Write out the index if an index is needed + def write + super if create_index? + end + + # Whether or not we need to create an index file. True if create_index is true + # or if create_index is :auto and we have more than one link in the index. + # If a link is added manually and create_index is not false, we force index + # creation because they obviously intend for there to be an index. False otherwise. + def create_index? + @create_index || @location.create_index == true || @location.create_index == :auto && @link_count > 1 + end + + # Return the index file URL. If create_index is true, this is the URL + # of the actual index file. If create_index is false, this is the URL + # of the first sitemap that was written out. Only call this method + # *after* the files have been finalized. + def index_url + if create_index? || !@first_sitemap_url + @location.url + else + @first_sitemap_url + end + end + + protected + + # Make sure the first sitemap has been written out and added to the index + def write_first_sitemap + if @first_sitemap + @first_sitemap.link.write unless @first_sitemap.link.written? + super_add(SitemapGenerator::Builder::SitemapIndexUrl.new(@first_sitemap.link, @first_sitemap.options)) + @link_count -= 1 # we already counted it, don't count it twice + # Store the URL because if create_index is false, this is the + # "index" URL + @first_sitemap_url = @first_sitemap.link.location.url + @first_sitemap = nil + end + end + end + end +end diff --git a/lib/sitemap_generator/builder/sitemap_index_url.rb b/lib/sitemap_generator/builder/sitemap_index_url.rb new file mode 100644 index 00000000..897b139d --- /dev/null +++ b/lib/sitemap_generator/builder/sitemap_index_url.rb @@ -0,0 +1,28 @@ +require 'builder' + +module SitemapGenerator + module Builder + class SitemapIndexUrl < SitemapUrl + + def initialize(path, options={}) + if index = path.is_a?(SitemapGenerator::Builder::SitemapIndexFile) && path + options = SitemapGenerator::Utilities.reverse_merge(options, :host => index.location.host, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0) + path = index.location.path_in_public + super(path, options) + else + super + end + end + + # Return the URL as XML + def to_xml(builder=nil) + builder = ::Builder::XmlMarkup.new if builder.nil? + builder.sitemap do + builder.loc self[:loc] + builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod] + end + builder << '' # force to string + end + end + end +end \ No newline at end of file diff --git a/lib/sitemap_generator/builder/sitemap_url.rb b/lib/sitemap_generator/builder/sitemap_url.rb new file mode 100644 index 00000000..310ec332 --- /dev/null +++ b/lib/sitemap_generator/builder/sitemap_url.rb @@ -0,0 +1,242 @@ +require 'builder' +require 'uri' +require 'time' +require 'date' + +module SitemapGenerator + module Builder + # A Hash-like class for holding information about a sitemap URL and + # generating an XML element suitable for sitemaps. + class SitemapUrl < Hash + + # Return a new instance with options configured on it. + # + # == Arguments + # * sitemap - a Sitemap instance, or + # * path, options - a path string and options hash + # + # == Options + # Requires a host to be set. If passing a sitemap, the sitemap must have a +default_host+ + # configured. If calling with a path and options, you must include the :host option. + # + # * +host+ + # * +priority+ + # * +changefreq+ + # * +lastmod+ + # * +images+ + # * +video+/+videos+ + # * +news+ + # * +mobile+ + # * +alternate+/+alternates+ + # * +pagemap+ + def initialize(path, options={}) + options = SitemapGenerator::Utilities.symbolize_keys(options) + if sitemap = path.is_a?(SitemapGenerator::Builder::SitemapFile) && path + SitemapGenerator::Utilities.reverse_merge!(options, :host => sitemap.location.host, :lastmod => sitemap.lastmod) + path = sitemap.location.path_in_public + end + + SitemapGenerator::Utilities.assert_valid_keys(options, :priority, :changefreq, :lastmod, :expires, :host, :images, :video, :news, :videos, :mobile, :alternate, :alternates, :pagemap) + SitemapGenerator::Utilities.reverse_merge!(options, :priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :images => [], :news => {}, :videos => [], :mobile => false, :alternates => []) + raise "Cannot generate a url without a host" unless SitemapGenerator::Utilities.present?(options[:host]) + + if video = options.delete(:video) + options[:videos] = video.is_a?(Array) ? options[:videos].concat(video) : options[:videos] << video + end + if alternate = options.delete(:alternate) + options[:alternates] = alternate.is_a?(Array) ? options[:alternates].concat(alternate) : options[:alternates] << alternate + end + + path = path.to_s.sub(/^\//, '') + loc = path.empty? ? options[:host] : (options[:host].to_s.sub(/\/$/, '') + '/' + path) + self.merge!( + :priority => options[:priority], + :changefreq => options[:changefreq], + :lastmod => options[:lastmod], + :expires => options[:expires], + :host => options[:host], + :loc => loc, + :images => prepare_images(options[:images], options[:host]), + :news => prepare_news(options[:news]), + :videos => options[:videos], + :mobile => options[:mobile], + :alternates => options[:alternates], + :pagemap => options[:pagemap] + ) + end + + # Return the URL as XML + def to_xml(builder=nil) + builder = ::Builder::XmlMarkup.new if builder.nil? + builder.url do + builder.loc self[:loc] + builder.lastmod w3c_date(self[:lastmod]) if self[:lastmod] + builder.expires w3c_date(self[:expires]) if self[:expires] + builder.changefreq self[:changefreq].to_s if self[:changefreq] + builder.priority format_float(self[:priority]) if self[:priority] + + unless SitemapGenerator::Utilities.blank?(self[:news]) + news_data = self[:news] + builder.news:news do + builder.news:publication do + builder.news :name, news_data[:publication_name].to_s if news_data[:publication_name] + builder.news :language, news_data[:publication_language].to_s if news_data[:publication_language] + end + + builder.news :access, news_data[:access].to_s if news_data[:access] + builder.news :genres, news_data[:genres].to_s if news_data[:genres] + builder.news :publication_date, w3c_date(news_data[:publication_date]) if news_data[:publication_date] + builder.news :title, news_data[:title].to_s if news_data[:title] + builder.news :keywords, news_data[:keywords].to_s if news_data[:keywords] + builder.news :stock_tickers, news_data[:stock_tickers].to_s if news_data[:stock_tickers] + end + end + + self[:images].each do |image| + builder.image:image do + builder.image :loc, image[:loc] + builder.image :caption, image[:caption].to_s if image[:caption] + builder.image :geo_location, image[:geo_location].to_s if image[:geo_location] + builder.image :title, image[:title].to_s if image[:title] + builder.image :license, image[:license].to_s if image[:license] + end + end + + self[:videos].each do |video| + builder.video :video do + builder.video :thumbnail_loc, video[:thumbnail_loc].to_s + builder.video :title, video[:title].to_s + builder.video :description, video[:description].to_s + builder.video :content_loc, video[:content_loc].to_s if video[:content_loc] + if video[:player_loc] + loc_attributes = { :allow_embed => yes_or_no_with_default(video[:allow_embed], true) } + loc_attributes[:autoplay] = video[:autoplay].to_s if SitemapGenerator::Utilities.present?(video[:autoplay]) + builder.video :player_loc, video[:player_loc].to_s, loc_attributes + end + builder.video :duration, video[:duration].to_s if video[:duration] + builder.video :expiration_date, w3c_date(video[:expiration_date]) if video[:expiration_date] + builder.video :rating, format_float(video[:rating]) if video[:rating] + builder.video :view_count, video[:view_count].to_s if video[:view_count] + builder.video :publication_date, w3c_date(video[:publication_date]) if video[:publication_date] + video[:tags].each {|tag| builder.video :tag, tag.to_s } if video[:tags] + builder.video :tag, video[:tag].to_s if video[:tag] + builder.video :category, video[:category].to_s if video[:category] + builder.video :family_friendly, yes_or_no_with_default(video[:family_friendly], true) if video.has_key?(:family_friendly) + builder.video :gallery_loc, video[:gallery_loc].to_s, :title => video[:gallery_title].to_s if video[:gallery_loc] + builder.video :price, video[:price].to_s, prepare_video_price_attribs(video) if SitemapGenerator::Utilities.present?(video[:price]) + if video[:uploader] + builder.video :uploader, video[:uploader].to_s, video[:uploader_info] ? { :info => video[:uploader_info].to_s } : {} + end + builder.video :live, yes_or_no_with_default(video[:live], true) if video.has_key?(:live) + builder.video :requires_subscription, yes_or_no_with_default(video[:requires_subscription], true) if video.has_key?(:requires_subscription) + end + end + + self[:alternates].each do |alternate| + rel = alternate[:nofollow] ? 'alternate nofollow' : 'alternate' + attributes = { :rel => rel, :href => alternate[:href].to_s } + attributes[:hreflang] = alternate[:lang].to_s if SitemapGenerator::Utilities.present?(alternate[:lang]) + attributes[:media] = alternate[:media].to_s if SitemapGenerator::Utilities.present?(alternate[:media]) + builder.xhtml :link, attributes + end + + unless SitemapGenerator::Utilities.blank?(self[:mobile]) + builder.mobile :mobile + end + + unless SitemapGenerator::Utilities.blank?(self[:pagemap]) + builder.pagemap :PageMap do + SitemapGenerator::Utilities.as_array(self[:pagemap][:dataobjects]).each do |dataobject| + builder.pagemap :DataObject, :type => dataobject[:type].to_s, :id => dataobject[:id].to_s do + SitemapGenerator::Utilities.as_array(dataobject[:attributes]).each do |attribute| + builder.pagemap :Attribute, attribute[:value].to_s, :name => attribute[:name].to_s + end + end + end + end + end + end + builder << '' # Force to string + end + + def news? + SitemapGenerator::Utilities.present?(self[:news]) + end + + protected + + def prepare_video_price_attribs(video) + attribs = {} + attribs[:currency] = video[:price_currency].to_s # required + attribs[:type] = video[:price_type] if SitemapGenerator::Utilities.present?(video[:price_type]) + attribs[:resolution] = video[:price_resolution] if SitemapGenerator::Utilities.present?(video[:price_resolution]) + attribs + end + + def prepare_news(news) + SitemapGenerator::Utilities.assert_valid_keys(news, :publication_name, :publication_language, :publication_date, :genres, :access, :title, :keywords, :stock_tickers) unless news.empty? + news + end + + # Return an Array of image option Hashes suitable to be parsed by SitemapGenerator::Builder::SitemapFile + def prepare_images(images, host) + images.delete_if { |key,value| key[:loc] == nil } + images.each do |r| + SitemapGenerator::Utilities.assert_valid_keys(r, :loc, :caption, :geo_location, :title, :license) + r[:loc] = URI.join(host, r[:loc]).to_s + end + images[0..(SitemapGenerator::MAX_SITEMAP_IMAGES-1)] + end + + def w3c_date(date) + if date.is_a?(String) + date + elsif date.respond_to?(:iso8601) + date.iso8601.sub(/Z$/i, '+00:00') + elsif date.is_a?(Date) && !date.is_a?(DateTime) + date.strftime("%Y-%m-%d") + else + zulutime = if date.is_a?(DateTime) + date.new_offset(0) + elsif date.respond_to?(:utc) + date.utc + elsif date.is_a?(Integer) + Time.at(date).utc + else + nil + end + + if zulutime + zulutime.strftime("%Y-%m-%dT%H:%M:%S+00:00") + else + zone = date.strftime('%z').insert(-3, ':') + date.strftime("%Y-%m-%dT%H:%M:%S") + zone + end + end + end + + # Accept a string or boolean and return 'yes' or 'no'. If a string, the + # value must be 'yes' or 'no'. Pass the default value as a boolean using `default`. + def yes_or_no(value) + if value.is_a?(String) + raise ArgumentError.new("Unrecognized value for yes/no field: #{value.inspect}") unless value =~ /^(yes|no)$/i + value.downcase + else + value ? 'yes' : 'no' + end + end + + # If the value is nil, return `default` converted to either 'yes' or 'no'. + # If the value is set, return its value converted to 'yes' or 'no'. + def yes_or_no_with_default(value, default) + yes_or_no(value.nil? ? default : value) + end + + # Format a float to to one decimal precision. + # TODO: Use rounding with precision once merged with framework_agnostic. + def format_float(value) + value.is_a?(String) ? value : ('%0.1f' % value) + end + end + end +end diff --git a/lib/sitemap_generator/core_ext.rb b/lib/sitemap_generator/core_ext.rb new file mode 100644 index 00000000..353cefa5 --- /dev/null +++ b/lib/sitemap_generator/core_ext.rb @@ -0,0 +1,3 @@ +Dir["#{File.dirname(__FILE__)}/core_ext/**/*.rb"].sort.each do |path| + require path +end diff --git a/lib/sitemap_generator/core_ext/big_decimal.rb b/lib/sitemap_generator/core_ext/big_decimal.rb new file mode 100644 index 00000000..871fd809 --- /dev/null +++ b/lib/sitemap_generator/core_ext/big_decimal.rb @@ -0,0 +1,55 @@ +require 'bigdecimal' + +begin + require 'psych' +rescue LoadError +end + +require 'yaml' + +# Define our own class rather than modify the global class +class SitemapGenerator::BigDecimal + YAML_TAG = 'tag:yaml.org,2002:float' + YAML_MAPPING = { 'Infinity' => '.Inf', '-Infinity' => '-.Inf', 'NaN' => '.NaN' } + + yaml_tag YAML_TAG + + def initialize(num) + @value = BigDecimal(num) + end + + def *(other) + other * @value + end + + def /(other) + SitemapGenerator::BigDecimal === other ? @value / other.instance_variable_get(:@value) : @value / other + end + + # This emits the number without any scientific notation. + # This is better than self.to_f.to_s since it doesn't lose precision. + # + # Note that reconstituting YAML floats to native floats may lose precision. + def to_yaml(opts = {}) + return super unless defined?(YAML::ENGINE) && YAML::ENGINE.syck? + + YAML.quick_emit(nil, opts) do |out| + string = to_s + out.scalar(YAML_TAG, YAML_MAPPING[string] || string, :plain) + end + end + + def encode_with(coder) + string = to_s + coder.represent_scalar(nil, YAML_MAPPING[string] || string) + end + + def to_d + self + end + + DEFAULT_STRING_FORMAT = 'F' + def to_s(format = DEFAULT_STRING_FORMAT) + @value.to_s(format) + end +end diff --git a/lib/sitemap_generator/core_ext/numeric.rb b/lib/sitemap_generator/core_ext/numeric.rb new file mode 100644 index 00000000..77162d47 --- /dev/null +++ b/lib/sitemap_generator/core_ext/numeric.rb @@ -0,0 +1,48 @@ +class SitemapGenerator::Numeric + KILOBYTE = 1024 + MEGABYTE = KILOBYTE * 1024 + GIGABYTE = MEGABYTE * 1024 + TERABYTE = GIGABYTE * 1024 + PETABYTE = TERABYTE * 1024 + EXABYTE = PETABYTE * 1024 + + def initialize(number) + @number = number + end + + # Enables the use of byte calculations and declarations, like 45.bytes + 2.6.megabytes + def bytes + @number + end + alias :byte :bytes + + def kilobytes + @number * KILOBYTE + end + alias :kilobyte :kilobytes + + def megabytes + @number * MEGABYTE + end + alias :megabyte :megabytes + + def gigabytes + @number * GIGABYTE + end + alias :gigabyte :gigabytes + + def terabytes + @number * TERABYTE + end + alias :terabyte :terabytes + + def petabytes + @number * PETABYTE + end + alias :petabyte :petabytes + + def exabytes + @number * EXABYTE + end + alias :exabyte :exabytes +end diff --git a/lib/sitemap_generator/helper.rb b/lib/sitemap_generator/helper.rb deleted file mode 100644 index 7536bff5..00000000 --- a/lib/sitemap_generator/helper.rb +++ /dev/null @@ -1,59 +0,0 @@ -require 'action_controller' -require 'action_controller/test_process' -begin - require 'application_controller' -rescue LoadError - # Rails < 2.3 - require 'application' -end - -module SitemapGenerator - module Helper - def load_sitemap_rb - controller = ApplicationController.new - controller.request = ActionController::TestRequest.new - controller.params = {} - controller.send(:initialize_current_url) - b = controller.instance_eval{binding} - sitemap_mapper_file = File.join(RAILS_ROOT, 'config/sitemap.rb') - eval(open(sitemap_mapper_file).read, b) - end - - def url_with_hostname(path) - URI.join(Sitemap.default_host, path).to_s - end - - def w3c_date(date) - date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00") - end - - def ping_search_engines(sitemap_index) - require 'open-uri' - index_location = CGI.escape(url_with_hostname(sitemap_index)) - # engines list from http://en.wikipedia.org/wiki/Sitemap_index - yahoo_app_id = SitemapGenerator::Sitemap.yahoo_app_id - {:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}", - :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}&appid=#{yahoo_app_id}", - :ask => "http://submissions.ask.com/ping?sitemap=#{index_location}", - :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{index_location}", - :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link| - begin - unless SitemapGenerator::Sitemap.yahoo_app_id == false - open(link) - puts "Successful ping of #{engine.to_s.titleize}" if verbose - end - rescue Timeout::Error, StandardError => e - puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" if verbose - puts <<-END if engine == :yahoo && verbose -Yahoo requires an 'AppID' for more than one ping per "timeframe", you can either: - - remove yahoo from the ping list (config/sitemap.rb): - SitemapGenerator::Sitemap.yahoo_app_id = false - - or add your Yahoo AppID to the generator (config/sitemap.rb): - SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id" -For more information: http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html - END - end - end - end - end -end diff --git a/lib/sitemap_generator/helpers/number_helper.rb b/lib/sitemap_generator/helpers/number_helper.rb new file mode 100644 index 00000000..90725b9a --- /dev/null +++ b/lib/sitemap_generator/helpers/number_helper.rb @@ -0,0 +1,237 @@ +# require "sitemap_generator/core_ext/big_decimal/conversions" +require "sitemap_generator/utilities" + +module SitemapGenerator + # = SitemapGenerator Number Helpers + module Helpers #:nodoc: + + # Provides methods for converting numbers into formatted strings. + # Methods are provided for precision, positional notation and file size + # and pretty printing. + # + # Most methods expect a +number+ argument, and will return it + # unchanged if can't be converted into a valid number. + module NumberHelper + + # Raised when argument +number+ param given to the helpers is invalid and + # the option :raise is set to +true+. + class InvalidNumberError < StandardError + attr_accessor :number + def initialize(number) + @number = number + end + end + + # Formats a +number+ with grouped thousands using +delimiter+ (e.g., 12,324). You can + # customize the format in the +options+ hash. + # + # ==== Options + # * :locale - Sets the locale to be used for formatting (defaults to current locale). + # * :delimiter - Sets the thousands delimiter (defaults to ","). + # * :separator - Sets the separator between the fractional and integer digits (defaults to "."). + # + # ==== Examples + # number_with_delimiter(12345678) # => 12,345,678 + # number_with_delimiter(12345678.05) # => 12,345,678.05 + # number_with_delimiter(12345678, :delimiter => ".") # => 12.345.678 + # number_with_delimiter(12345678, :separator => ",") # => 12,345,678 + # number_with_delimiter(12345678.05, :locale => :fr) # => 12 345 678,05 + # number_with_delimiter(98765432.98, :delimiter => " ", :separator => ",") + # # => 98 765 432,98 + def number_with_delimiter(number, options = {}) + SitemapGenerator::Utilities.symbolize_keys!(options) + + begin + Float(number) + rescue ArgumentError, TypeError + if options[:raise] + raise InvalidNumberError, number + else + return number + end + end + + defaults = { + :separator => ".", + :delimiter => ",", + :precision => 3, + :significant => false, + :strip_insignificant_zeros => false + } + options = SitemapGenerator::Utilities.reverse_merge(options, defaults) + + parts = number.to_s.to_str.split('.') + parts[0].gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{options[:delimiter]}") + parts.join(options[:separator]) + end + + # Formats a +number+ with the specified level of :precision (e.g., 112.32 has a precision + # of 2 if +:significant+ is +false+, and 5 if +:significant+ is +true+). + # You can customize the format in the +options+ hash. + # + # ==== Options + # * :locale - Sets the locale to be used for formatting (defaults to current locale). + # * :precision - Sets the precision of the number (defaults to 3). + # * :significant - If +true+, precision will be the # of significant_digits. If +false+, the # of fractional digits (defaults to +false+) + # * :separator - Sets the separator between the fractional and integer digits (defaults to "."). + # * :delimiter - Sets the thousands delimiter (defaults to ""). + # * :strip_insignificant_zeros - If +true+ removes insignificant zeros after the decimal separator (defaults to +false+) + # + # ==== Examples + # number_with_precision(111.2345) # => 111.235 + # number_with_precision(111.2345, :precision => 2) # => 111.23 + # number_with_precision(13, :precision => 5) # => 13.00000 + # number_with_precision(389.32314, :precision => 0) # => 389 + # number_with_precision(111.2345, :significant => true) # => 111 + # number_with_precision(111.2345, :precision => 1, :significant => true) # => 100 + # number_with_precision(13, :precision => 5, :significant => true) # => 13.000 + # number_with_precision(111.234, :locale => :fr) # => 111,234 + # number_with_precision(13, :precision => 5, :significant => true, strip_insignificant_zeros => true) + # # => 13 + # number_with_precision(389.32314, :precision => 4, :significant => true) # => 389.3 + # number_with_precision(1111.2345, :precision => 2, :separator => ',', :delimiter => '.') + # # => 1.111,23 + def number_with_precision(number, options = {}) + SitemapGenerator::Utilities.symbolize_keys!(options) + + number = begin + Float(number) + rescue ArgumentError, TypeError + if options[:raise] + raise InvalidNumberError, number + else + return number + end + end + + defaults = { + :separator => ".", + :delimiter => ",", + :precision => 3, + :significant => false, + :strip_insignificant_zeros => false + } + precision_defaults = { + :delimiter => "" + } + defaults = defaults.merge(precision_defaults) + + options = SitemapGenerator::Utilities.reverse_merge(options, defaults) # Allow the user to unset default values: Eg.: :significant => false + precision = options.delete :precision + significant = options.delete :significant + strip_insignificant_zeros = options.delete :strip_insignificant_zeros + + if significant and precision > 0 + if number == 0 + digits, rounded_number = 1, 0 + else + digits = (Math.log10(number.abs) + 1).floor + rounded_number = (SitemapGenerator::BigDecimal.new(number.to_s) / SitemapGenerator::BigDecimal.new((10 ** (digits - precision)).to_f.to_s)).round.to_f * 10 ** (digits - precision) + digits = (Math.log10(rounded_number.abs) + 1).floor # After rounding, the number of digits may have changed + end + precision = precision - digits + precision = precision > 0 ? precision : 0 #don't let it be negative + else + rounded_number = SitemapGenerator::Utilities.round(SitemapGenerator::BigDecimal.new(number.to_s), precision).to_f + end + formatted_number = number_with_delimiter("%01.#{precision}f" % rounded_number, options) + if strip_insignificant_zeros + escaped_separator = Regexp.escape(options[:separator]) + formatted_number.sub(/(#{escaped_separator})(\d*[1-9])?0+\z/, '\1\2').sub(/#{escaped_separator}\z/, '') + else + formatted_number + end + + end + + STORAGE_UNITS = [:byte, :kb, :mb, :gb, :tb].freeze + DECIMAL_UNITS = {0 => :unit, 1 => :ten, 2 => :hundred, 3 => :thousand, 6 => :million, 9 => :billion, 12 => :trillion, 15 => :quadrillion, + -1 => :deci, -2 => :centi, -3 => :mili, -6 => :micro, -9 => :nano, -12 => :pico, -15 => :femto}.freeze + + # Formats the bytes in +number+ into a more understandable representation + # (e.g., giving it 1500 yields 1.5 KB). This method is useful for + # reporting file sizes to users. You can customize the + # format in the +options+ hash. + # + # See number_to_human if you want to pretty-print a generic number. + # + # ==== Options + # * :locale - Sets the locale to be used for formatting (defaults to current locale). + # * :precision - Sets the precision of the number (defaults to 3). + # * :significant - If +true+, precision will be the # of significant_digits. If +false+, the # of fractional digits (defaults to +true+) + # * :separator - Sets the separator between the fractional and integer digits (defaults to "."). + # * :delimiter - Sets the thousands delimiter (defaults to ""). + # * :strip_insignificant_zeros - If +true+ removes insignificant zeros after the decimal separator (defaults to +true+) + # ==== Examples + # number_to_human_size(123) # => 123 Bytes + # number_to_human_size(1234) # => 1.21 KB + # number_to_human_size(12345) # => 12.1 KB + # number_to_human_size(1234567) # => 1.18 MB + # number_to_human_size(1234567890) # => 1.15 GB + # number_to_human_size(1234567890123) # => 1.12 TB + # number_to_human_size(1234567, :precision => 2) # => 1.2 MB + # number_to_human_size(483989, :precision => 2) # => 470 KB + # number_to_human_size(1234567, :precision => 2, :separator => ',') # => 1,2 MB + # + # Non-significant zeros after the fractional separator are stripped out by default (set + # :strip_insignificant_zeros to +false+ to change that): + # number_to_human_size(1234567890123, :precision => 5) # => "1.1229 TB" + # number_to_human_size(524288000, :precision=>5) # => "500 MB" + def number_to_human_size(number, options = {}) + SitemapGenerator::Utilities.symbolize_keys!(options) + + number = begin + Float(number) + rescue ArgumentError, TypeError + if options[:raise] + raise InvalidNumberError, number + else + return number + end + end + + defaults = { + :separator => ".", + :delimiter => ",", + :precision => 3, + :significant => false, + :strip_insignificant_zeros => false + } + human = { + :delimiter => "", + :precision => 3, + :significant => true, + :strip_insignificant_zeros => true + } + defaults = defaults.merge(human) + options = SitemapGenerator::Utilities.reverse_merge(options, defaults) + #for backwards compatibility with those that didn't add strip_insignificant_zeros to their locale files + options[:strip_insignificant_zeros] = true if not options.key?(:strip_insignificant_zeros) + + storage_units_format = "%n %u" + + if number.to_i < 1024 + unit = number.to_i > 1 || number.to_i == 0 ? 'Bytes' : 'Byte' + storage_units_format.gsub(/%n/, number.to_i.to_s).gsub(/%u/, unit) + else + max_exp = STORAGE_UNITS.size - 1 + exponent = (Math.log(number) / Math.log(1024)).to_i # Convert to base 1024 + exponent = max_exp if exponent > max_exp # we need this to avoid overflow for the highest unit + number /= 1024 ** exponent + + unit_key = STORAGE_UNITS[exponent] + units = { + :byte => "Bytes", + :kb => "KB", + :mb => "MB", + :gb => "GB", + :tb => "TB" + } + unit = units[unit_key] + formatted_number = number_with_precision(number, options) + storage_units_format.gsub(/%n/, formatted_number).gsub(/%u/, unit) + end + end + end + end +end diff --git a/lib/sitemap_generator/interpreter.rb b/lib/sitemap_generator/interpreter.rb new file mode 100644 index 00000000..7f24d57d --- /dev/null +++ b/lib/sitemap_generator/interpreter.rb @@ -0,0 +1,82 @@ +require 'sitemap_generator' + +module SitemapGenerator + + # Provide a class for evaluating blocks, making the URL helpers from the framework + # and API methods available to it. + class Interpreter + + if SitemapGenerator.app.is_at_least_rails3? + if !::Rails.application.nil? + include ::Rails.application.routes.url_helpers + end + elsif SitemapGenerator.app.is_rails? + require 'action_controller' + include ActionController::UrlWriter + end + + # Call with a block to evaluate a dynamic config. The only method exposed for you is + # `add` to add a link to the sitemap object attached to this interpreter. + # + # === Options + # * link_set - a LinkSet instance to use. Default is SitemapGenerator::Sitemap. + # + # All other options are passed to the LinkSet by setting them using accessor methods. + def initialize(opts={}, &block) + opts = SitemapGenerator::Utilities.reverse_merge(opts, :link_set => SitemapGenerator::Sitemap) + @linkset = opts.delete :link_set + @linkset.send(:set_options, opts) + eval(&block) if block_given? + end + + def add(*args) + @linkset.add(*args) + end + + def add_to_index(*args) + @linkset.add_to_index(*args) + end + + # Start a new group of sitemaps. Any of the options to SitemapGenerator.new may + # be passed. Pass a block with calls to +add+ to add links to the sitemaps. + # + # All groups use the same sitemap index. + def group(*args, &block) + @linkset.group(*args, &block) + end + + # Return the LinkSet instance so that you can access it from within the `create` block + # without having to use the yield_sitemap option. + def sitemap + @linkset + end + + # Evaluate the block in the interpreter. Pass :yield_sitemap => true to + # yield the Interpreter instance to the block...for old-style calling. + def eval(opts={}, &block) + if block_given? + if opts[:yield_sitemap] + yield @linkset + else + instance_eval(&block) + end + end + end + + # Run the interpreter on a config file using + # the default SitemapGenerator::Sitemap sitemap object. + # + # === Options + # * :config_file - full path to the config file to evaluate. + # Default is config/sitemap.rb in your application's root directory. + # All other options are passed to +new+. + def self.run(opts={}, &block) + opts = opts.dup + config_file = opts.delete(:config_file) + config_file ||= SitemapGenerator.app.root + 'config/sitemap.rb' + interpreter = self.new(opts) + interpreter.instance_eval(File.read(config_file), config_file.to_s) + interpreter + end + end +end diff --git a/lib/sitemap_generator/link.rb b/lib/sitemap_generator/link.rb deleted file mode 100644 index 8cad01e6..00000000 --- a/lib/sitemap_generator/link.rb +++ /dev/null @@ -1,19 +0,0 @@ - -module SitemapGenerator - class Link - class << self - def generate(path, options = {}) - options.assert_valid_keys(:priority, :changefreq, :lastmod, :host) - options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host) - { - :path => path, - :priority => options[:priority], - :changefreq => options[:changefreq], - :lastmod => options[:lastmod], - :host => options[:host], - :loc => URI.join(options[:host], path).to_s - } - end - end - end -end diff --git a/lib/sitemap_generator/link_set.rb b/lib/sitemap_generator/link_set.rb index 4e76e019..d5eec968 100644 --- a/lib/sitemap_generator/link_set.rb +++ b/lib/sitemap_generator/link_set.rb @@ -1,28 +1,670 @@ +require 'builder' + +# A LinkSet provisions a bunch of links to sitemap files. It also writes the index file +# which lists all the sitemap files written. module SitemapGenerator class LinkSet - attr_accessor :default_host, :yahoo_app_id, :links - - def initialize - @links = [] + @@requires_finalization_opts = [:filename, :sitemaps_path, :sitemaps_host, :namer] + @@new_location_opts = [:filename, :sitemaps_path, :namer] + + attr_reader :default_host, :sitemaps_path, :filename, :create_index + attr_accessor :include_root, :include_index, :adapter, :yield_sitemap, :max_sitemap_links + attr_writer :verbose + + # Create a new sitemap index and sitemap files. Pass a block with calls to the following + # methods: + # * +add+ - Add a link to the current sitemap + # * +group+ - Start a new group of sitemaps + # + # == Options + # + # Any option supported by +new+ can be passed. The options will be + # set on the instance using the accessor methods. This is provided mostly + # as a convenience. + # + # In addition to the options to +new+, the following options are supported: + # * :finalize - The sitemaps are written as they get full and at the end + # of the block. Pass +false+ as the value to prevent the sitemap or sitemap index + # from being finalized. Default is +true+. + # + # If you are calling +create+ more than once in your sitemap configuration file, + # make sure that you set a different +sitemaps_path+ or +filename+ for each call otherwise + # the sitemaps may be overwritten. + def create(opts={}, &block) + reset! + set_options(opts) + if verbose + start_time = Time.now + puts "In '#{sitemap_index.location.public_path}':" + end + interpreter.eval(:yield_sitemap => yield_sitemap?, &block) + finalize! + end_time = Time.now if verbose + output(sitemap_index.stats_summary(:time_taken => end_time - start_time)) if verbose + self + end + + # Constructor + # + # == Options: + # * :adapter - instance of a class with a write method which takes a SitemapGenerator::Location + # and raw XML data and persists it. The default adapter is a SitemapGenerator::FileAdapter + # which simply writes files to the filesystem. You can use a SitemapGenerator::WaveAdapter + # for uploading sitemaps to remote servers - useful for read-only hosts such as Heroku. Or + # you can provide an instance of your own class to provide custom behavior. + # + # * :default_host - host including protocol to use in all sitemap links + # e.g. http://en.google.ca + # + # * :public_path - Full or relative path to the directory to write sitemaps into. + # Defaults to the public/ directory in your application root directory or + # the current working directory. + # + # * :sitemaps_host - String. Host including protocol to use when generating + # a link to a sitemap file i.e. the hostname of the server where the sitemaps are hosted. + # The value will differ from the hostname in your sitemap links. + # For example: `'http://amazon.aws.com/'`. + # + # Note that `include_index` is automatically turned off when the `sitemaps_host` does + # not match `default_host`. Because the link to the sitemap index file that would + # otherwise be added would point to a different host than the rest of the links in + # the sitemap. Something that the sitemap rules forbid. + # + # * :sitemaps_path - path fragment within public to write sitemaps + # to e.g. 'en/'. Sitemaps are written to public_path + sitemaps_path + # + # * :filename - symbol giving the base name for files (default :sitemap). + # The names are generated like "#{filename}.xml.gz", "#{filename}1.xml.gz", "#{filename}2.xml.gz" + # with the first file being the index if you have more than one sitemap file. + # + # * :include_index - Boolean. Whether to add a link pointing to the sitemap index + # to the current sitemap. This points search engines to your Sitemap Index to + # include it in the indexing of your site. Default is `false`. Turned off when + # `sitemaps_host` is set or within a `group()` block. Turned off because Google can complain + # about nested indexing and because if a robot is already reading your sitemap, they + # probably know about the index. + # + # * :include_root - Boolean. Whether to **add the root** url i.e. '/' to the + # current sitemap. Default is `true`. Turned off within a `group()` block. + # + # * :search_engines - Hash. A hash of search engine names mapped to + # ping URLs. See ping_search_engines. + # + # * :verbose - If +true+, output a summary line for each sitemap and sitemap + # index that is created. Default is +false+. + # + # * :create_index - Supported values: `true`, `false`, `:auto`. Default: `:auto`. + # Whether to create a sitemap index file. If `true` an index file is always created, + # regardless of how many links are in your sitemap. If `false` an index file is never + # created. If `:auto` an index file is created only if your sitemap has more than + # one sitemap file. + # + # * :namer - A SitemapGenerator::SimpleNamer instance for generating the sitemap + # and index file names. See :filename if you don't need to do anything fancy, and can + # accept the default naming conventions. + # + # * :compress - Specifies which files to compress with gzip. Default is `true`. Accepted values: + # * `true` - Boolean; compress all files. + # * `false` - Boolean; write out only uncompressed files. + # * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files. + # + # The compression setting applies to groups too. So :all_but_first will have the same effect (the first + # file in the group will not be compressed, the rest will). So if you require different behaviour for your + # groups, pass in a `:compress` option e.g. group(:compress => false) { add('/link') } + # + # * :max_sitemap_links - The maximum number of links to put in each sitemap. + # Default is `SitemapGenerator::MAX_SITEMAPS_LINKS`, or 50,000. + # + # Note: When adding a new option be sure to include it in `options_for_group()` if + # the option should be inherited by groups. + def initialize(options={}) + @default_host, @sitemaps_host, @yield_sitemap, @sitemaps_path, @adapter, @verbose, @protect_index, @sitemap_index, @added_default_links, @created_group, @sitemap = nil + + options = SitemapGenerator::Utilities.reverse_merge(options, + :include_root => true, + :include_index => false, + :filename => :sitemap, + :search_engines => { + :google => "http://www.google.com/webmasters/tools/ping?sitemap=%s", + :bing => "http://www.bing.com/ping?sitemap=%s" + }, + :create_index => :auto, + :compress => true, + :max_sitemap_links => SitemapGenerator::MAX_SITEMAP_LINKS + ) + options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) } + + # If an index is passed in, protect it from modification. + # Sitemaps can be added to the index but nothing else can be changed. + if options[:sitemap_index] + @protect_index = true + end + end + + # Add a link to a Sitemap. If a new Sitemap is required, one will be created for + # you. + # + # link - string link e.g. '/merchant', '/article/1' or whatever. + # options - see README. + # host - host for the link, defaults to your default_host. + def add(link, options={}) + add_default_links if !@added_default_links + sitemap.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => @default_host)) + rescue SitemapGenerator::SitemapFullError + finalize_sitemap! + retry + rescue SitemapGenerator::SitemapFinalizedError + @sitemap = sitemap.new + retry + end + + # Add a link to the Sitemap Index. + # * link - A string link e.g. '/sitemaps/sitemap1.xml.gz' or a SitemapFile instance. + # * options - A hash of options including `:lastmod`, ':priority`, ':changefreq` and `:host` + # + # The `:host` option defaults to the value of `sitemaps_host` which is the host where your + # sitemaps reside. If no `sitemaps_host` is set, the `default_host` is used. + def add_to_index(link, options={}) + sitemap_index.add(link, SitemapGenerator::Utilities.reverse_merge(options, :host => sitemaps_host)) + end + + # Create a new group of sitemap files. + # + # Returns a new LinkSet instance with the options passed in set on it. All groups + # share the sitemap index, which is not affected by any of the options passed here. + # + # === Options + # Any of the options to LinkSet.new. Except for :public_path which is shared + # by all groups. + # + # The current options are inherited by the new group of sitemaps. The only exceptions + # being :include_index and :include_root which default to +false+. + # + # Pass a block to add links to the new LinkSet. If you pass a block the sitemaps will + # be finalized when the block returns. + # + # If you are not changing any of the location settings like filename, + # sitemaps_path, sitemaps_host or namer, + # links you add within the group will be added to the current sitemap. + # Otherwise the current sitemap file is finalized and a new sitemap file started, + # using the options you specified. + # + # Most commonly, you'll want to give the group's files a distinct name using + # the filename option. + # + # Options like :default_host can be used and it will only affect the links + # within the group. Links added outside of the group will revert to the previous + # +default_host+. + def group(opts={}, &block) + @created_group = true + original_opts = opts.dup + + if (@@requires_finalization_opts & original_opts.keys).empty? + # If no new filename or path is specified reuse the default sitemap file. + # A new location object will be set on it for the duration of the group. + original_opts[:sitemap] = sitemap + elsif original_opts.key?(:sitemaps_host) && (@@new_location_opts & original_opts.keys).empty? + # If no location options are provided we are creating the next sitemap in the + # current series, so finalize and inherit the namer. + finalize_sitemap! + original_opts[:namer] = namer + end + + opts = options_for_group(original_opts) + @group = SitemapGenerator::LinkSet.new(opts) + if opts.key?(:sitemap) + # If the group is sharing the current sitemap, set the + # new location options on the location object. + @original_location = @sitemap.location.dup + @sitemap.location.merge!(@group.sitemap_location) + if block_given? + @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block) + @group.finalize_sitemap! + @sitemap.location.merge!(@original_location) + end + else + # Handle the case where a user only has one group, and it's being written + # to a new sitemap file. They would expect there to be an index. So force + # index creation. If there is more than one group, we would have an index anyways, + # so it's safe to force index creation in these other cases. In the case that + # the groups reuse the current sitemap, don't force index creation because + # we want the default behaviour i.e. only an index if more than one sitemap file. + # Don't force index creation if the user specifically requested no index. This + # unfortunately means that if they set it to :auto they may be getting an index + # when they didn't expect one, but you shouldn't be using groups if you only have + # one sitemap and don't want an index. Rather, just add the links directly in the create() + # block. + @group.send(:create_index=, true, true) if @group.create_index != false + + if block_given? + @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block) + @group.finalize_sitemap! + end + end + @group + end + + # Ping search engines to notify them of updated sitemaps. + # + # Search engines are already notified for you if you run `rake sitemap:refresh`. + # If you want to ping search engines separately to your sitemap generation, run + # `rake sitemap:refresh:no_ping` and then run a rake task or script + # which calls this method as in the example below. + # + # == Arguments + # * sitemap_index_url - The full URL to your sitemap index file. + # If not provided the location is based on the `host` you have + # set and any other options like your `sitemaps_path`. The URL + # will be CGI escaped for you when included as part of the + # search engine ping URL. + # + # == Options + # A hash of one or more search engines to ping in addition to the + # default search engines. The key is the name of the search engine + # as a string or symbol and the value is the full URL to ping with + # a string interpolation that will be replaced by the CGI escaped sitemap + # index URL. If you have any literal percent characters in your URL you + # need to escape them with `%%`. For example if your sitemap index URL + # is `http://example.com/sitemap.xml.gz` and your + # ping url is `http://example.com/100%%/ping?url=%s` + # then the final URL that is pinged will be `http://example.com/100%/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz` + # + # == Examples + # + # Both of these examples will ping the default search engines in addition to `http://superengine.com/ping?url=http%3A%2F%2Fexample.com%2Fsitemap.xml.gz` + # + # SitemapGenerator::Sitemap.host('http://example.com/') + # SitemapGenerator::Sitemap.ping_search_engines(:super_engine => 'http://superengine.com/ping?url=%s') + # + # Is equivalent to: + # + # SitemapGenerator::Sitemap.ping_search_engines('http://example.com/sitemap.xml.gz', :super_engine => 'http://superengine.com/ping?url=%s') + def ping_search_engines(*args) + require 'cgi/session' + require 'open-uri' + require 'timeout' + + engines = args.last.is_a?(Hash) ? args.pop : {} + unescaped_url = args.shift || sitemap_index_url + index_url = CGI.escape(unescaped_url) + + output("\n") + output("Pinging with URL '#{unescaped_url}':") + search_engines.merge(engines).each do |engine, link| + link = link % index_url + name = Utilities.titleize(engine.to_s) + begin + Timeout::timeout(10) { + if URI.respond_to?(:open) # Available since Ruby 2.5 + URI.open(link) + else + open(link) # using Kernel#open became deprecated since Ruby 2.7. See https://bugs.ruby-lang.org/issues/15893 + end + } + output(" Successful ping of #{name}") + rescue Timeout::Error, StandardError => e + output("Ping failed for #{name}: #{e.inspect} (URL #{link})") + end + end + end + + # Return a count of the total number of links in all sitemaps + def link_count + sitemap_index.total_link_count + end + + # Return the host to use in links to the sitemap files. This defaults to your + # +default_host+. + def sitemaps_host + @sitemaps_host || @default_host + end + + # Lazy-initialize a sitemap instance and return it. + def sitemap + @sitemap ||= SitemapGenerator::Builder::SitemapFile.new(sitemap_location) + end + + # Lazy-initialize a sitemap index instance and return it. + def sitemap_index + @sitemap_index ||= SitemapGenerator::Builder::SitemapIndexFile.new(sitemap_index_location) + end + + # Return the full url to the sitemap index file. When `create_index` is `false` + # the first sitemap is technically the index, so this will be its URL. It's important + # to use this method to get the index url because `sitemap_index.location.url` will + # not be correct in such situations. + # + # KJV: This is somewhat confusing. + def sitemap_index_url + sitemap_index.index_url + end + + # All done. Write out remaining files. + def finalize! + finalize_sitemap! + finalize_sitemap_index! + end + + # Return a boolean indicating hether to add a link to the sitemap index file + # to the current sitemap. This points search engines to your Sitemap Index so + # they include it in the indexing of your site, but is not strictly neccessary. + # Default is `true`. Turned off when `sitemaps_host` is set or within a `group()` block. + def include_index? + if default_host && sitemaps_host && sitemaps_host != default_host + false + else + @include_index + end + end + + # Return a boolean indicating whether to automatically add the root url i.e. '/' to the + # current sitemap. Default is `true`. Turned off within a `group()` block. + def include_root? + !!@include_root end - def default_host=(host) - @default_host = host - add_default_links + # Set verbose on the instance or by setting ENV['VERBOSE'] to true or false. + # By default verbose is true. When running rake tasks, pass the -s + # option to rake to turn verbose off. + def verbose + if @verbose.nil? + @verbose = SitemapGenerator.verbose.nil? ? true : SitemapGenerator.verbose + end + @verbose end + # Return a boolean indicating whether or not to yield the sitemap. + def yield_sitemap? + @yield_sitemap.nil? ? SitemapGenerator.yield_sitemap? : !!@yield_sitemap + end + + protected + + # Set each option on this instance using accessor methods. This will affect + # both the sitemap and the sitemap index. + # + # If both `filename` and `namer` are passed, set filename first so it + # doesn't override the latter. + def set_options(opts={}) + opts = opts.dup + %w(filename namer).each do |key| + if value = opts.delete(key.to_sym) + send("#{key}=", value) + end + end + opts.each_pair do |key, value| + send("#{key}=", value) + end + end + + # Given +opts+, modify it and return it prepped for creating a new group from this LinkSet. + # If :public_path is present in +opts+ it is removed because groups cannot + # change the public path. + def options_for_group(opts) + opts = SitemapGenerator::Utilities.reverse_merge(opts, + :include_index => false, + :include_root => false, + :sitemap_index => sitemap_index + ) + opts.delete(:public_path) + + # Reverse merge the current settings. + # + # This hash could be a problem because it needs to be maintained + # when new options are added, but can easily be missed. We really could + # do with a separate SitemapOptions class. + current_settings = [ + :include_root, + :include_index, + :sitemaps_path, + :public_path, + :sitemaps_host, + :verbose, + :default_host, + :adapter, + :create_index, + :compress, + :max_sitemap_links + ].inject({}) do |hash, key| + value = instance_variable_get(:"@#{key}") + hash[key] = value unless value.nil? + hash + end + SitemapGenerator::Utilities.reverse_merge!(opts, current_settings) + opts + end + + # Add default links if those options are turned on. Record the fact that we have done so + # in an instance variable. def add_default_links - # Add default links - @links << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0) - @links << Link.generate('/sitemap_index.xml.gz', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0) + @added_default_links = true + link_options = { :lastmod => Time.now, :changefreq => 'always', :priority => 1.0 } + if include_root? + add('/', link_options) + end + if include_index? + add(sitemap_index, link_options) + end + end + + # Finalize a sitemap by including it in the index and outputting a summary line. + # Do nothing if it has already been finalized. + # + # Don't finalize if the sitemap is empty. + # + # Add the default links if they have not been added yet and no groups have been created. + # If the default links haven't been added we know that the sitemap is empty, + # because they are added on the first call to add(). This ensure that if the + # block passed to create() is empty the default links are still included in the + # sitemap. + def finalize_sitemap! + return if sitemap.finalized? || sitemap.empty? && @created_group + add_default_links if !@added_default_links && !@created_group + # This will finalize it. We add to the index even if not creating an index because + # the index keeps track of how many links are in our sitemaps and we need this info + # for the summary line. Also the index determines which file gets the first name + # so everything has to go via the index. + add_to_index(sitemap) unless sitemap.empty? end - - def add_links - yield Mapper.new(self) + + # Finalize a sitemap index and output a summary line. Do nothing if it has already + # been finalized. + def finalize_sitemap_index! + return if @protect_index || sitemap_index.finalized? + sitemap_index.finalize! + sitemap_index.write end - - def add_link(link) - @links << link + + # Return the interpreter linked to this instance. + def interpreter + require 'sitemap_generator/interpreter' + @interpreter ||= SitemapGenerator::Interpreter.new(:link_set => self) + end + + # Reset this instance. Keep the same options, but return to the same state + # as before any sitemaps were created. + def reset! + @sitemap_index = nil if @sitemap_index && @sitemap_index.finalized? && !@protect_index + @sitemap = nil if @sitemap && @sitemap.finalized? + self.namer.reset + @added_default_links = false + end + + # Write the given string to STDOUT. Used so that the sitemap config can be + # evaluated and some info output to STDOUT in a lazy fasion. + def output(string) + return unless verbose + puts string + end + + module LocationHelpers + public + + # Set the host name, including protocol, that will be used by default on each + # of your sitemap links. You can pass a different host in your options to `add` + # if you need to change it on a per-link basis. + def default_host=(value) + @default_host = value + update_location_info(:host, value) + end + + # Set the public_path. This path gives the location of your public directory. + # The default is the public/ directory in your Rails root. Or if Rails is not + # found, it defaults to public/ in the current directory (of the process). + # + # Example: 'tmp/' if you don't want to generate in public for some reason. + # + # Set to nil to use the current directory. + def public_path=(value) + @public_path = Pathname.new(SitemapGenerator::Utilities.append_slash(value)) + if @public_path.relative? + @public_path = SitemapGenerator.app.root + @public_path + end + update_location_info(:public_path, @public_path) + @public_path + end + + # Return a Pathname with the full path to the public directory + def public_path + @public_path ||= self.send(:public_path=, 'public/') + end + + # Set the sitemaps_path. This path gives the location to write sitemaps to + # relative to your public_path. + # Example: 'sitemaps/' to generate your sitemaps in 'public/sitemaps/'. + def sitemaps_path=(value) + @sitemaps_path = value + update_location_info(:sitemaps_path, value) + end + + # Set the host name, including protocol, that will be used on all links to your sitemap + # files. Useful when the server that hosts the sitemaps is not on the same host as + # the links in the sitemap. + # + # Note that `include_index` will be turned off to avoid adding a link to a sitemap with + # a different host than the other links. + def sitemaps_host=(value) + @sitemaps_host = value + update_location_info(:host, value) + end + + # Set the filename base to use when generating sitemaps (and the sitemap index). + # + # === Example + # filename = :sitemap + # + # === Generates + # sitemap.xml.gz, sitemap1.xml.gz, sitemap2.xml.gz, ... + def filename=(value) + @filename = value + self.namer = SitemapGenerator::SimpleNamer.new(@filename) + end + + # Set the search engines hash to a new hash of search engine names mapped to + # ping URLs (see ping_search_engines). If the value is nil it is converted + # to an empty hash. + # === Example + # search_engines = { :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=%s" } + def search_engines=(value) + @search_engines = value || {} + end + + # Return the hash of search engines. + def search_engines + @search_engines || {} + end + + # Return a new +SitemapLocation+ instance with the current options included + def sitemap_location + SitemapGenerator::SitemapLocation.new( + :host => sitemaps_host, + :namer => namer, + :public_path => public_path, + :sitemaps_path => @sitemaps_path, + :adapter => @adapter, + :verbose => verbose, + :compress => @compress, + :max_sitemap_links => max_sitemap_links + ) + end + + # Return a new +SitemapIndexLocation+ instance with the current options included + def sitemap_index_location + SitemapGenerator::SitemapLocation.new( + :host => sitemaps_host, + :namer => namer, + :public_path => public_path, + :sitemaps_path => @sitemaps_path, + :adapter => @adapter, + :verbose => verbose, + :create_index => @create_index, + :compress => @compress + ) + end + + # Set the value of +create_index+ on the SitemapIndexLocation object of the + # SitemapIndexFile. + # + # Whether to create a sitemap index file. Supported values: `true`, `false`, `:auto`. + # If `true` an index file is always created, regardless of how many links + # are in your sitemap. If `false` an index file is never created. + # If `:auto` an index file is created only if your sitemap has more than + # one sitemap file. + def create_index=(value, force=false) + @create_index = value + # Allow overriding the protected status of the index when we are creating a group. + # Because sometimes we need to force an index in that case. But generally we don't + # want to allow people to mess with this value if the index is protected. + @sitemap_index.location[:create_index] = value if @sitemap_index && ((!@sitemap_index.finalized? && !@protect_index) || force) + end + + # Set the namer to use to generate the sitemap (and index) file names. + # This should be an instance of SitemapGenerator::SimpleNamer + def namer=(value) + @namer = value + @sitemap.location[:namer] = value if @sitemap && !@sitemap.finalized? + @sitemap_index.location[:namer] = value if @sitemap_index && !@sitemap_index.finalized? && !@protect_index + end + + # Return the namer object. If it is not set, looks for it on + # the current sitemap and if there is no sitemap, creates a new one using + # the current filename. + def namer + @namer ||= @sitemap && @sitemap.location.namer || SitemapGenerator::SimpleNamer.new(@filename) + end + + # Set the value of the compress setting. + # + # Values: + # * `true` - Boolean; compress all files + # * `false` - Boolean; write out only uncompressed files + # * `:all_but_first` - Symbol; leave the first file uncompressed but compress any remaining files. + # + # The compression setting applies to groups too. So :all_but_first will have the same effect (the first + # file in the group will not be compressed, the rest will). So if you require different behaviour for your + # groups, pass in a `:compress` option e.g. group(:compress => false) { add('/link') } + def compress=(value) + @compress = value + @sitemap_index.location[:compress] = @compress if @sitemap_index + @sitemap.location[:compress] = @compress if @sitemap + end + + # Return the current compression setting. Its value determines which files will be gzip'ed. + # See the setter for documentation of its values. + def compress + @compress + end + + protected + + # Update the given attribute on the current sitemap index and sitemap file location objects. + # But don't create the index or sitemap files yet if they are not already created. + def update_location_info(attribute, value, opts={}) + opts = SitemapGenerator::Utilities.reverse_merge(opts, :include_index => !@protect_index) + @sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized? + @sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized? + end end + include LocationHelpers end -end \ No newline at end of file +end diff --git a/lib/sitemap_generator/mapper.rb b/lib/sitemap_generator/mapper.rb deleted file mode 100644 index 16cb1302..00000000 --- a/lib/sitemap_generator/mapper.rb +++ /dev/null @@ -1,15 +0,0 @@ -module SitemapGenerator - # Generator instances are used to build links. - # The object passed to the add_links block in config/sitemap.rb is a Generator instance. - class Mapper - attr_accessor :set - - def initialize(set) - @set = set - end - - def add(loc, options = {}) - set.add_link Link.generate(loc, options) - end - end -end \ No newline at end of file diff --git a/lib/sitemap_generator/railtie.rb b/lib/sitemap_generator/railtie.rb new file mode 100644 index 00000000..9e2fc5b0 --- /dev/null +++ b/lib/sitemap_generator/railtie.rb @@ -0,0 +1,7 @@ +module SitemapGenerator + class Railtie < Rails::Railtie + rake_tasks do + load "tasks/sitemap_generator_tasks.rake" + end + end +end diff --git a/lib/sitemap_generator/simple_namer.rb b/lib/sitemap_generator/simple_namer.rb new file mode 100644 index 00000000..aa981960 --- /dev/null +++ b/lib/sitemap_generator/simple_namer.rb @@ -0,0 +1,75 @@ +module SitemapGenerator + # A class for generating sitemap filenames. + # + # The SimpleNamer uses the same namer instance for the sitemap index and the sitemaps. + # If no index is needed, the first sitemap gets the first name. However, if + # an index is needed, the index gets the first name. + # + # A typical sequence would looks like this: + # * sitemap.xml.gz + # * sitemap1.xml.gz + # * sitemap2.xml.gz + # * sitemap3.xml.gz + # * ... + # + # Arguments: + # base - string or symbol that forms the base of the generated filename e.g. + # if `:geo`, files are generated like `geo.xml.gz`, `geo1.xml.gz`, `geo2.xml.gz` etc. + # + # Options: + # :extension - Default: '.xml.gz'. File extension to append. + # :start - Default: 1. Numerical index at which to start counting. + # :zero - Default: nil. A string or number that is appended to +base+ + # to create the first name in the sequence. So setting this + # to '_index' would produce 'sitemap_index.xml.gz' as + # the first name. Thereafter, the numerical index defined by +start+ + # is used, and subsequent names would be 'sitemap1.xml.gz', 'sitemap2.xml.gz', etc. + # In these examples the `base` string is assumed to be 'sitemap'. + class SimpleNamer + def initialize(base, options={}) + @options = SitemapGenerator::Utilities.reverse_merge(options, + :zero => nil, # identifies the marker for the start of the series + :extension => '.xml.gz', + :start => 1 + ) + @base = base + reset + end + + def to_s + extension = @options[:extension] + "#{@base}#{@count}#{extension}" + end + + # Reset to the first name + def reset + @count = @options[:zero] + end + + # True if on the first name + def start? + @count == @options[:zero] + end + + # Return this instance set to the next name + def next + if start? + @count = @options[:start] + else + @count += 1 + end + self + end + + # Return this instance set to the previous name + def previous + raise NameError, "Already at the start of the series" if start? + if @count <= @options[:start] + @count = @options[:zero] + else + @count -= 1 + end + self + end + end +end diff --git a/lib/sitemap_generator/sitemap_location.rb b/lib/sitemap_generator/sitemap_location.rb new file mode 100644 index 00000000..c91ba6de --- /dev/null +++ b/lib/sitemap_generator/sitemap_location.rb @@ -0,0 +1,204 @@ +require 'sitemap_generator/helpers/number_helper' + +module SitemapGenerator + # A class for determining the exact location at which to write sitemap data. + # Handles reserving filenames from namers, constructing paths and sending + # data to the adapter to be written out. + class SitemapLocation < Hash + include SitemapGenerator::Helpers::NumberHelper + + PATH_OUTPUT_WIDTH = 47 # Character width of the path in the summary lines + + [:host, :adapter].each do |method| + define_method(method) do + raise SitemapGenerator::SitemapError, "No value set for #{method}" unless self[method] + self[method] + end + end + + [:public_path, :sitemaps_path].each do |method| + define_method(method) do + Pathname.new(SitemapGenerator::Utilities.append_slash(self[method])) + end + end + + # If no +filename+ or +namer+ is provided, the default namer is used, which + # generates names like sitemap.xml.gz, sitemap1.xml.gz, sitemap2.xml.gz and so on. + # + # === Options + # * :adapter - SitemapGenerator::Adapter subclass + # * :filename - full name of the file e.g. 'sitemap1.xml.gz' + # * :host - host name for URLs. The full URL to the file is then constructed from + # the host, sitemaps_path and filename + # * :namer - a SitemapGenerator::SimpleNamer instance for generating file names. + # Should be passed if no +filename+ is provided. + # * :public_path - path to the "public" directory, or the directory you want to + # write sitemaps in. Default is a directory public/ + # in the current working directory, or relative to the Rails root + # directory if running under Rails. + # * :sitemaps_path - gives the path relative to the public_path in which to + # write sitemaps e.g. sitemaps/. + # * :verbose - whether to output summary into to STDOUT. Default +false+. + # * :create_index - whether to create a sitemap index. Default `:auto`. See LinkSet::create_index= + # for possible values. Only applies to the SitemapIndexLocation object. + # * compress - The LinkSet compress setting. Default: +true+. If `false` any `.gz` extension is + # stripped from the filename. If `:all_but_first`, only the `.gz` extension of the first + # filename is stripped off. If `true` the extensions are left unchanged. + # * max_sitemap_links - The maximum number of links to put in each sitemap. + def initialize(opts={}) + SitemapGenerator::Utilities.assert_valid_keys(opts, [ + :adapter, + :public_path, + :sitemaps_path, + :host, + :filename, + :namer, + :verbose, + :create_index, + :compress, + :max_sitemap_links + ]) + opts[:adapter] ||= SitemapGenerator::FileAdapter.new + opts[:public_path] ||= SitemapGenerator.app.root + 'public/' + # This is a bit of a hack to make the SimpleNamer act like the old SitemapNamer. + # It doesn't really make sense to create a default namer like this because the + # namer instance should be shared by the location objects of the sitemaps and + # sitemap index files. However, this greatly eases testing, so I'm leaving it in + # for now. + if !opts[:filename] && !opts[:namer] + opts[:namer] = SitemapGenerator::SimpleNamer.new(:sitemap, :start => 2, :zero => 1) + end + opts[:verbose] = !!opts[:verbose] + self.merge!(opts) + end + + # Return a new Location instance with the given options merged in + def with(opts={}) + self.merge(opts) + end + + # Full path to the directory of the file. + def directory + (public_path + sitemaps_path).expand_path.to_s + end + + # Full path of the file including the filename. + def path + (public_path + sitemaps_path + filename).expand_path.to_s + end + + # Relative path of the file (including the filename) relative to public_path + def path_in_public + (sitemaps_path + filename).to_s + end + + # Full URL of the file. + def url + URI.join(host, sitemaps_path.to_s, filename.to_s).to_s + end + + # Return the size of the file at path + def filesize + File.size?(path) + end + + # Return the filename. Raises an exception if no filename or namer is set. + # If using a namer once the filename has been retrieved from the namer its + # value is locked so that it is unaffected by further changes to the namer. + def filename + raise SitemapGenerator::SitemapError, "No filename or namer set" unless self[:filename] || self[:namer] + unless self[:filename] + self.send(:[]=, :filename, self[:namer].to_s, :super => true) + + # Post-process the filename for our compression settings. + # Strip the `.gz` from the extension if we aren't compressing this file. + # If you're setting the filename manually, :all_but_first won't work as + # expected. Ultimately I should force using a namer in all circumstances. + # Changing the filename here will affect how the FileAdapter writes out the file. + if self[:compress] == false || + (self[:namer] && self[:namer].start? && self[:compress] == :all_but_first) + self[:filename].gsub!(/\.gz$/, '') + end + end + self[:filename] + end + + # If a namer is set, reserve the filename and increment the namer. + # Returns the reserved name. + def reserve_name + if self[:namer] + filename + self[:namer].next + end + self[:filename] + end + + # Return true if this location has a fixed filename. If no name has been + # reserved from the namer, for instance, returns false. + def reserved_name? + !!self[:filename] + end + + def namer + self[:namer] + end + + def verbose? + self[:verbose] + end + + # If you set the filename, clear the namer and vice versa. + def []=(key, value, opts={}) + if !opts[:super] + case key + when :namer + super(:filename, nil) + when :filename + super(:namer, nil) + end + end + super(key, value) + end + + # Write `data` out to a file. + # Output a summary line if verbose is true. + def write(data, link_count) + adapter.write(self, data) + puts summary(link_count) if verbose? + end + + # Return a summary string + def summary(link_count) + filesize = number_to_human_size(self.filesize) + width = self.class::PATH_OUTPUT_WIDTH + path = SitemapGenerator::Utilities.ellipsis(self.path_in_public, width) + "+ #{('%-'+width.to_s+'s') % path} #{'%10s' % link_count} links / #{'%10s' % filesize}" + end + end + + class SitemapIndexLocation < SitemapLocation + def initialize(opts={}) + if !opts[:filename] && !opts[:namer] + opts[:namer] = SitemapGenerator::SimpleNamer.new(:sitemap) + end + super(opts) + end + + # Whether to create a sitemap index. Default `:auto`. See LinkSet::create_index= + # for possible values. + # + # A placeholder for an option which should really go into some + # kind of options class. + def create_index + self[:create_index] + end + + # Return a summary string + def summary(link_count) + filesize = number_to_human_size(self.filesize) + width = self.class::PATH_OUTPUT_WIDTH - 3 + path = SitemapGenerator::Utilities.ellipsis(self.path_in_public, width) + "+ #{('%-'+width.to_s+'s') % path} #{'%10s' % link_count} sitemaps / #{'%10s' % filesize}" + end + end +end diff --git a/lib/sitemap_generator/tasks.rb b/lib/sitemap_generator/tasks.rb index a1306242..d33c0137 100644 --- a/lib/sitemap_generator/tasks.rb +++ b/lib/sitemap_generator/tasks.rb @@ -1 +1,53 @@ -load File.expand_path(File.join(File.dirname(__FILE__), '../../tasks/sitemap_generator_tasks.rake')) \ No newline at end of file +# require this file to load the tasks +require 'rake' + +# Require sitemap_generator at runtime. If we don't do this the ActionView helpers are included +# before the Rails environment can be loaded by other Rake tasks, which causes problems +# for those tasks when rendering using ActionView. +namespace :sitemap do + # Require sitemap_generator only. When installed as a plugin the require will fail, so in + # that case, load the environment first. + task :require do + begin + require 'sitemap_generator' + rescue LoadError => e + if defined?(Rails::VERSION) + Rake::Task['sitemap:require_environment'].invoke + else + raise e + end + end + end + + # Require sitemap_generator after loading the Rails environment. We still need the require + # in case we are installed as a gem and are setup to not automatically be required. + task :require_environment do + if defined?(Rails::VERSION) + Rake::Task['environment'].invoke + end + require 'sitemap_generator' + end + + desc "Install a default config/sitemap.rb file" + task :install => ['sitemap:require'] do + SitemapGenerator::Utilities.install_sitemap_rb(verbose) + end + + desc "Delete all Sitemap files in public/ directory" + task :clean => ['sitemap:require'] do + SitemapGenerator::Utilities.clean_files + end + + desc "Generate sitemaps and ping search engines." + task :refresh => ['sitemap:create'] do + SitemapGenerator::Sitemap.ping_search_engines + end + + desc "Generate sitemaps but don't ping search engines." + task 'refresh:no_ping' => ['sitemap:create'] + + desc "Generate sitemaps but don't ping search engines. Alias for refresh:no_ping." + task :create => ['sitemap:require_environment'] do + SitemapGenerator::Interpreter.run(:config_file => ENV["CONFIG_FILE"], :verbose => verbose) + end +end diff --git a/lib/sitemap_generator/templates.rb b/lib/sitemap_generator/templates.rb new file mode 100644 index 00000000..5b6f3522 --- /dev/null +++ b/lib/sitemap_generator/templates.rb @@ -0,0 +1,41 @@ +module SitemapGenerator + # Provide convenient access to template files. E.g. + # + # SitemapGenerator.templates.sitemap_index + # + # Lazy-load and cache for efficient access. + # Define an accessor method for each template file. + class Templates + FILES = { + :sitemap_sample => 'sitemap.rb', + } + + # Dynamically define accessors for each key defined in FILES + attr_accessor(*FILES.keys) + FILES.keys.each do |name| + eval <<-END + define_method(:#{name}) do + @#{name} ||= read_template(:#{name}) + end + END + end + + def initialize(root = SitemapGenerator.root) + @root = root + end + + # Return the full path to a template. + # + # file template symbol e.g. :sitemap_sample + def template_path(template) + File.join(@root, 'templates', self.class::FILES[template]) + end + + protected + + # Read the template file and return its contents. + def read_template(template) + File.read(template_path(template)) + end + end +end diff --git a/lib/sitemap_generator/utilities.rb b/lib/sitemap_generator/utilities.rb new file mode 100644 index 00000000..6c080395 --- /dev/null +++ b/lib/sitemap_generator/utilities.rb @@ -0,0 +1,181 @@ +module SitemapGenerator + module Utilities + extend self + + # Copy templates/sitemap.rb to config if not there yet. + def install_sitemap_rb(verbose=false) + if File.exist?(SitemapGenerator.app.root + 'config/sitemap.rb') + puts "already exists: config/sitemap.rb, file not copied" if verbose + else + FileUtils.cp( + SitemapGenerator.templates.template_path(:sitemap_sample), + SitemapGenerator.app.root + 'config/sitemap.rb') + puts "created: config/sitemap.rb" if verbose + end + end + + # Remove config/sitemap.rb if exists. + def uninstall_sitemap_rb + if File.exist?(SitemapGenerator.app.root + 'config/sitemap.rb') + File.rm(SitemapGenerator.app.root + 'config/sitemap.rb') + end + end + + # Clean sitemap files in output directory. + def clean_files + FileUtils.rm(Dir[SitemapGenerator.app.root + 'public/sitemap*.xml.gz']) + end + + # Validate all keys in a hash match *valid keys, raising ArgumentError on a + # mismatch. Note that keys are NOT treated indifferently, meaning if you use + # strings for keys but assert symbols as keys, this will fail. + def assert_valid_keys(hash, *valid_keys) + unknown_keys = hash.keys - [valid_keys].flatten + raise(ArgumentError, "Unknown key(s): #{unknown_keys.join(", ")}") unless unknown_keys.empty? + end + + # Return a new hash with all keys converted to symbols, as long as + # they respond to +to_sym+. + def symbolize_keys(hash) + symbolize_keys!(hash.dup) + end + + # Destructively convert all keys to symbols, as long as they respond + # to +to_sym+. + def symbolize_keys!(hash) + hash.keys.each do |key| + hash[(key.to_sym rescue key) || key] = hash.delete(key) + end + hash + end + + # Make a list of `value` if it is not a list already. If `value` is + # nil, an empty list is returned. If `value` is already a list, return it unchanged. + def as_array(value) + if value.nil? + [] + elsif value.is_a?(Array) + value + else + [value] + end + end + + # Rounds the float with the specified precision. + # + # x = 1.337 + # x.round # => 1 + # x.round(1) # => 1.3 + # x.round(2) # => 1.34 + def round(float, precision = nil) + if precision + magnitude = 10.0 ** precision + (float * magnitude).round / magnitude + else + float.round + end + end + + # Allows for reverse merging two hashes where the keys in the calling hash take precedence over those + # in the other_hash. This is particularly useful for initializing an option hash with default values: + # + # def setup(options = {}) + # options.reverse_merge! :size => 25, :velocity => 10 + # end + # + # Using merge, the above example would look as follows: + # + # def setup(options = {}) + # { :size => 25, :velocity => 10 }.merge(options) + # end + # + # The default :size and :velocity are only set if the +options+ hash passed in doesn't already + # have the respective key. + def reverse_merge(hash, other_hash) + other_hash.merge(hash) + end + + # Performs the opposite of merge, with the keys and values from the first hash taking precedence over the second. + # Modifies the receiver in place. + def reverse_merge!(hash, other_hash) + hash.merge!( other_hash ){|k,o,n| o } + end + + # An object is blank if it's false, empty, or a whitespace string. + # For example, "", " ", +nil+, [], and {} are blank. + # + # This simplifies: + # + # if !address.nil? && !address.empty? + # + # ...to: + # + # if !address.blank? + def blank?(object) + case object + when NilClass, FalseClass + true + when TrueClass, Numeric + false + when String + object !~ /\S/ + when Hash, Array + object.empty? + when Object + object.respond_to?(:empty?) ? object.empty? : !object + end + end + + # An object is present if it's not blank. + def present?(object) + !blank?(object) + end + + # Sets $VERBOSE for the duration of the block and back to its original value afterwards. + def with_warnings(flag) + old_verbose, $VERBOSE = $VERBOSE, flag + yield + ensure + $VERBOSE = old_verbose + end + + def titleize(string) + string.gsub!(/_/, ' ') + string.split(/(\W)/).map(&:capitalize).join + end + + def truthy?(value) + ['1', 1, 't', 'true', true].include?(value) + end + + def falsy?(value) + ['0', 0, 'f', 'false', false].include?(value) + end + + # Append a slash to `path` if it does not already end in a slash. + # Returns a string. Expects a string or Pathname object. + def append_slash(path) + strpath = path.to_s + if strpath[-1] != nil && strpath[-1].chr != '/' + strpath + '/' + else + strpath + end + end + + # Replace the last 3 characters of string with ... if the string is as big + # or bigger than max. + def ellipsis(string, max) + if string.size > max + (string[0, max - 3] || '') + '...' + else + string + end + end + + # Return the bytesize length of the string. Ruby 1.8.6 compatible. + def bytesize(string) + string.respond_to?(:bytesize) ? string.bytesize : string.length + end + end +end diff --git a/lib/tasks/sitemap_generator_tasks.rake b/lib/tasks/sitemap_generator_tasks.rake new file mode 100644 index 00000000..f2f2c19b --- /dev/null +++ b/lib/tasks/sitemap_generator_tasks.rake @@ -0,0 +1 @@ +load(File.expand_path(File.join(File.dirname(__FILE__), '../sitemap_generator/tasks.rb'))) diff --git a/rails/install.rb b/rails/install.rb index c49f1591..30db915d 100644 --- a/rails/install.rb +++ b/rails/install.rb @@ -1,12 +1,2 @@ # Install hook code here - -# Copy sitemap_template.rb to config/sitemap.rb -require 'fileutils' -sitemap_template = File.join(File.dirname(__FILE__), '../templates/sitemap.rb') -new_sitemap = File.join(RAILS_ROOT, 'config/sitemap.rb') -if File.exist?(new_sitemap) - puts "already exists: config/sitemap.rb, file not copied" -else - puts "created: config/sitemap.rb" - FileUtils.cp(sitemap_template, new_sitemap) -end \ No newline at end of file +SitemapGenerator::Utilities.install_sitemap_rb diff --git a/rails/uninstall.rb b/rails/uninstall.rb index ffe744bf..db822434 100644 --- a/rails/uninstall.rb +++ b/rails/uninstall.rb @@ -1,4 +1,2 @@ # Uninstall hook code here - -new_sitemap = File.join(RAILS_ROOT, 'config/sitemap.rb') -File.rm(new_sitemap) if File.exist?(new_sitemap) \ No newline at end of file +SitemapGenerator::Utilities.uninstall_sitemap_rb diff --git a/sitemap_generator.gemspec b/sitemap_generator.gemspec index 4c8f7c9f..edf1b5ed 100644 --- a/sitemap_generator.gemspec +++ b/sitemap_generator.gemspec @@ -1,73 +1,24 @@ -# Generated by jeweler -# DO NOT EDIT THIS FILE DIRECTLY -# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command -# -*- encoding: utf-8 -*- +# encoding: utf-8 Gem::Specification.new do |s| - s.name = %q{sitemap_generator} - s.version = "0.2.2" - - s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= - s.authors = ["Adam Salter"] - s.date = %q{2009-11-10} - s.description = %q{Install as a plugin or Gem to easily generate ['enterprise-class'][enterprise_class] Google Sitemaps for your Rails site, using a simple 'Rails Routes'-like DSL and a single rake task.} - s.email = %q{adam.salter@codebright.net } - s.extra_rdoc_files = [ - "README.md" - ] - s.files = [ - "MIT-LICENSE", - "README.md", - "Rakefile", - "VERSION", - "lib/sitemap_generator.rb", - "lib/sitemap_generator/helper.rb", - "lib/sitemap_generator/link.rb", - "lib/sitemap_generator/link_set.rb", - "lib/sitemap_generator/mapper.rb", - "lib/sitemap_generator/tasks.rb", - "rails/install.rb", - "rails/uninstall.rb", - "tasks/sitemap_generator_tasks.rake", - "templates/sitemap.rb", - "templates/sitemap_index.builder", - "templates/xml_sitemap.builder" - ] - s.homepage = %q{http://github.com/adamsalter/sitemap_generator} - s.rdoc_options = ["--charset=UTF-8"] - s.require_paths = ["lib"] - s.rubygems_version = %q{1.3.5} - s.summary = %q{Generate 'enterprise-class' Sitemaps for your Rails site using a simple 'Rails Routes'-like DSL and a single Rake task} - s.test_files = [ - "test/mock_app/app/controllers/application_controller.rb", - "test/mock_app/app/controllers/contents_controller.rb", - "test/mock_app/app/models/content.rb", - "test/mock_app/config/boot.rb", - "test/mock_app/config/environment.rb", - "test/mock_app/config/environments/development.rb", - "test/mock_app/config/environments/production.rb", - "test/mock_app/config/environments/test.rb", - "test/mock_app/config/initializers/backtrace_silencers.rb", - "test/mock_app/config/initializers/inflections.rb", - "test/mock_app/config/initializers/mime_types.rb", - "test/mock_app/config/initializers/new_rails_defaults.rb", - "test/mock_app/config/initializers/session_store.rb", - "test/mock_app/config/routes.rb", - "test/mock_app/config/sitemap.rb", - "test/mock_app/db/migrate/20090826121911_create_contents.rb", - "test/mock_app/db/schema.rb", - "test/sitemap_generator_test.rb", - "test/test_helper.rb" - ] - - if s.respond_to? :specification_version then - current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION - s.specification_version = 3 - - if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then - else - end - else - end + s.name = 'sitemap_generator' + s.version = File.read('VERSION').chomp + s.platform = Gem::Platform::RUBY + s.authors = ['Karl Varga'] + s.email = 'kjvarga@gmail.com' + s.homepage = 'https://github.com/kjvarga/sitemap_generator' + s.summary = 'Easily generate XML Sitemaps' + s.description = 'SitemapGenerator is a framework-agnostic XML Sitemap generator written in Ruby with automatic Rails integration. It supports Video, News, Image, Mobile, PageMap and Alternate Links sitemap extensions and includes Rake tasks for managing your sitemaps, as well as many other great features.' + s.license = 'MIT' + s.add_dependency 'builder', '~> 3.0' + s.add_development_dependency 'aws-sdk-core' + s.add_development_dependency 'aws-sdk-s3' + s.add_development_dependency 'fog-aws' + s.add_development_dependency 'google-cloud-storage' + s.add_development_dependency 'nokogiri' + s.add_development_dependency 'rake' + s.add_development_dependency 'rspec_junit_formatter' + s.add_development_dependency 'rspec' + s.add_development_dependency 'webmock' + s.files = Dir.glob('{lib,rails,templates}/**/*') + %w(CHANGES.md MIT-LICENSE README.md VERSION) end - diff --git a/spec/files/sitemap.create.rb b/spec/files/sitemap.create.rb new file mode 100644 index 00000000..44c5128d --- /dev/null +++ b/spec/files/sitemap.create.rb @@ -0,0 +1,12 @@ +SitemapGenerator::Sitemap.default_host = 'http://www.example.com' + +SitemapGenerator::Sitemap.create do + add '/contents', :priority => 0.7, :changefreq => 'daily' + + # add all individual articles + (1..10).each do |i| + add '/content/#{i}' + end + + add '/merchant_path', :host => 'https://www.example.com' +end diff --git a/spec/files/sitemap.groups.rb b/spec/files/sitemap.groups.rb new file mode 100644 index 00000000..5871b655 --- /dev/null +++ b/spec/files/sitemap.groups.rb @@ -0,0 +1,46 @@ +SitemapGenerator::Sitemap.default_host = 'http://www.example.com' + +SitemapGenerator::Sitemap.create( + :include_root => true, :include_index => true, + :filename => :new_sitemaps, :sitemaps_path => 'fr/') do + + add('/one', :priority => 0.7, :changefreq => 'daily') + + # Test a new location and filename and sitemaps host + group(:sitemaps_path => 'en/', :filename => :xxx, + :sitemaps_host => 'http://newhost.com') do + + add '/two' + add '/three' + end + + # Test a simple namer. + group(:namer => SitemapGenerator::SimpleNamer.new(:abc, :start => 4, :zero => 3)) do + add '/four' + add '/five' + add '/six' + end + + # Test a simple namer + group(:namer => SitemapGenerator::SimpleNamer.new(:def)) do + add '/four' + add '/five' + add '/six' + end + + add '/seven' + + # This should be in a file of its own. + # Not technically valid to have a link with a different host, but people like + # to do strange things sometimes. + group(:sitemaps_host => 'http://exceptional.com') do + add '/eight' + add '/nine' + end + + add '/ten' + + # Not technically valid to have a link with a different host, but people like + # to do strange things sometimes + add '/merchant_path', :host => 'https://www.merchanthost.com' +end diff --git a/spec/sitemap_generator/adapters/aws_sdk_adapter_spec.rb b/spec/sitemap_generator/adapters/aws_sdk_adapter_spec.rb new file mode 100644 index 00000000..72774922 --- /dev/null +++ b/spec/sitemap_generator/adapters/aws_sdk_adapter_spec.rb @@ -0,0 +1,100 @@ +require 'spec_helper' +require 'aws-sdk-core' +require 'aws-sdk-s3' + +describe 'SitemapGenerator::AwsSdkAdapter' do + let(:location) { SitemapGenerator::SitemapLocation.new(compress: compress) } + let(:adapter) { SitemapGenerator::AwsSdkAdapter.new('bucket', options) } + let(:options) { {} } + let(:compress) { nil } + + shared_examples 'it writes the raw data to a file and then uploads that file to S3' do + it 'writes the raw data to a file and then uploads that file to S3' do + s3_object = double(:s3_object) + s3_resource = double(:s3_resource) + s3_bucket_resource = double(:s3_bucket_resource) + expect(adapter).to receive(:s3_resource).and_return(s3_resource) + expect(s3_resource).to receive(:bucket).with('bucket').and_return(s3_bucket_resource) + expect(s3_bucket_resource).to receive(:object).with('path_in_public').and_return(s3_object) + expect(location).to receive(:path_in_public).and_return('path_in_public') + expect(location).to receive(:path).and_return('path') + expect(s3_object).to receive(:upload_file).with('path', hash_including( + acl: 'public-read', + cache_control: 'private, max-age=0, no-cache', + content_type: content_type + )).and_return(nil) + expect_any_instance_of(SitemapGenerator::FileAdapter).to receive(:write).with(location, 'raw_data') + adapter.write(location, 'raw_data') + end + end + + describe 'write' do + context 'with no compress option' do + let(:content_type) { 'application/xml' } + + it_behaves_like 'it writes the raw data to a file and then uploads that file to S3' + end + + context 'with compress true' do + let(:content_type) { 'application/x-gzip' } + let(:compress) { true } + + it_behaves_like 'it writes the raw data to a file and then uploads that file to S3' + end + end + + describe 's3_resource' do + it 'returns a new S3 resource' do + s3_resource_options = double(:s3_resource_options) + expect(adapter).to receive(:s3_resource_options).and_return(s3_resource_options) + expect(Aws::S3::Resource).to receive(:new).with(s3_resource_options).and_return('resource') + expect(adapter.send(:s3_resource)).to eql('resource') + end + end + + describe 's3_resource_options' do + it 'does not include region' do + expect(adapter.send(:s3_resource_options)[:region]).to be_nil + end + + it 'does not include credentials' do + expect(adapter.send(:s3_resource_options)[:credentials]).to be_nil + end + + context 'with AWS region option' do + let(:options) { { aws_region: 'region' } } + + it 'includes the region' do + expect(adapter.send(:s3_resource_options)[:region]).to eql('region') + end + end + + it 'does not include endpoint' do + expect(adapter.send(:s3_resource_options)[:endpoint]).to be_nil + end + + context 'with AWS endpoint option' do + let(:options) { { aws_endpoint: 'endpoint' } } + + it 'includes the endpoint' do + expect(adapter.send(:s3_resource_options)[:endpoint]).to eql('endpoint') + end + end + + context 'with AWS access key id and secret access key options' do + let(:options) do + { + aws_access_key_id: 'access_key_id', + aws_secret_access_key: 'secret_access_key' + } + end + + it 'includes the credentials' do + credentials = adapter.send(:s3_resource_options)[:credentials] + expect(credentials).to be_a(Aws::Credentials) + expect(credentials.access_key_id).to eql('access_key_id') + expect(credentials.secret_access_key).to eql('secret_access_key') + end + end + end +end diff --git a/spec/sitemap_generator/adapters/file_adapter_spec.rb b/spec/sitemap_generator/adapters/file_adapter_spec.rb new file mode 100644 index 00000000..177a8ba8 --- /dev/null +++ b/spec/sitemap_generator/adapters/file_adapter_spec.rb @@ -0,0 +1,20 @@ +require 'spec_helper' + +describe 'SitemapGenerator::FileAdapter' do + let(:location) { SitemapGenerator::SitemapLocation.new } + let(:adapter) { SitemapGenerator::FileAdapter.new } + + describe 'write' do + it 'should gzip contents if filename ends in .gz' do + expect(location).to receive(:filename).and_return('sitemap.xml.gz').twice + expect(adapter).to receive(:gzip) + adapter.write(location, 'data') + end + + it 'should not gzip contents if filename does not end in .gz' do + expect(location).to receive(:filename).and_return('sitemap.xml').twice + expect(adapter).to receive(:plain) + adapter.write(location, 'data') + end + end +end diff --git a/spec/sitemap_generator/adapters/google_storage_adapter_spec.rb b/spec/sitemap_generator/adapters/google_storage_adapter_spec.rb new file mode 100644 index 00000000..d7acfd02 --- /dev/null +++ b/spec/sitemap_generator/adapters/google_storage_adapter_spec.rb @@ -0,0 +1,33 @@ +# encoding: UTF-8 +require 'spec_helper' +require 'google/cloud/storage' + +describe SitemapGenerator::GoogleStorageAdapter do + subject(:adapter) { SitemapGenerator::GoogleStorageAdapter.new(options) } + + let(:options) { { credentials: 'abc', project_id: 'project_id', bucket: 'bucket' } } + + describe 'write' do + let(:location) { SitemapGenerator::SitemapLocation.new } + + it 'writes the raw data to a file and then uploads that file to Google Storage' do + bucket = double(:bucket) + storage = double(:storage) + bucket_resource = double(:bucket_resource) + expect(Google::Cloud::Storage).to receive(:new).with(credentials: 'abc', project_id: 'project_id').and_return(storage) + expect(storage).to receive(:bucket).with('bucket').and_return(bucket_resource) + expect(location).to receive(:path_in_public).and_return('path_in_public') + expect(location).to receive(:path).and_return('path') + expect(bucket_resource).to receive(:create_file).with('path', 'path_in_public', acl: 'public').and_return(nil) + expect_any_instance_of(SitemapGenerator::FileAdapter).to receive(:write).with(location, 'raw_data') + adapter.write(location, 'raw_data') + end + end + + describe '.new' do + it "doesn't modify the original options" do + adapter + expect(options.size).to be(3) + end + end +end diff --git a/spec/sitemap_generator/adapters/s3_adapter_spec.rb b/spec/sitemap_generator/adapters/s3_adapter_spec.rb new file mode 100644 index 00000000..582a2351 --- /dev/null +++ b/spec/sitemap_generator/adapters/s3_adapter_spec.rb @@ -0,0 +1,35 @@ +# encoding: UTF-8 +require 'spec_helper' +require 'fog-aws' + +describe SitemapGenerator::S3Adapter do + let(:location) do + SitemapGenerator::SitemapLocation.new( + :namer => SitemapGenerator::SimpleNamer.new(:sitemap), + :public_path => 'tmp/', + :sitemaps_path => 'test/', + :host => 'http://example.com/') + end + let(:directory) do + double('directory', + :files => double('files', :create => nil) + ) + end + let(:directories) do + double('directories', + :directories => + double('directory class', + :new => directory + ) + ) + end + + before do + SitemapGenerator::S3Adapter # eager load + expect(Fog::Storage).to receive(:new).and_return(directories) + end + + it 'should create the file in S3 with a single operation' do + subject.write(location, 'payload') + end +end diff --git a/spec/sitemap_generator/application_spec.rb b/spec/sitemap_generator/application_spec.rb new file mode 100644 index 00000000..c7140611 --- /dev/null +++ b/spec/sitemap_generator/application_spec.rb @@ -0,0 +1,53 @@ +require 'spec_helper' + +describe SitemapGenerator::Application do + before do + stub_const('Rails::VERSION', '1') + @app = SitemapGenerator::Application.new + end + + describe 'is_at_least_rails3?' do + tests = { + :nil => false, + '2.3.11' => false, + '3.0.1' => true, + '3.0.11' => true + } + + it 'should identify the rails version correctly' do + tests.each do |version, result| + expect(Rails).to receive(:version).and_return(version) + expect(@app.is_at_least_rails3?).to eq(result) + end + end + end + + describe 'with Rails' do + before do + @root = '/test' + expect(Rails).to receive(:root).and_return(@root).at_least(:once) + end + + it 'should use the Rails.root' do + expect(@app.root).to be_a(Pathname) + expect(@app.root.to_s).to eq(@root) + expect((@app.root + 'public/').to_s).to eq(File.join(@root, 'public/')) + end + end + + describe 'with no Rails' do + before do + hide_const('Rails') + end + + it 'should not be Rails' do + expect(@app.is_rails?).to be(false) + end + + it 'should use the current working directory' do + expect(@app.root).to be_a(Pathname) + expect(@app.root.to_s).to eq(Dir.getwd) + expect((@app.root + 'public/').to_s).to eq(File.join(Dir.getwd, 'public/')) + end + end +end diff --git a/spec/sitemap_generator/builder/sitemap_file_spec.rb b/spec/sitemap_generator/builder/sitemap_file_spec.rb new file mode 100644 index 00000000..2e56adc8 --- /dev/null +++ b/spec/sitemap_generator/builder/sitemap_file_spec.rb @@ -0,0 +1,153 @@ +require 'spec_helper' + +describe 'SitemapGenerator::Builder::SitemapFile' do + let(:location) { SitemapGenerator::SitemapLocation.new(:namer => SitemapGenerator::SimpleNamer.new(:sitemap, :start => 2, :zero => 1), :public_path => 'tmp/', :sitemaps_path => 'test/', :host => 'http://example.com/') } + let(:sitemap) { SitemapGenerator::Builder::SitemapFile.new(location) } + + it 'should have a default namer' do + sitemap = SitemapGenerator::Builder::SitemapFile.new + expect(sitemap.location.filename).to eq('sitemap1.xml.gz') + end + + it 'should return the name of the sitemap file' do + expect(sitemap.location.filename).to eq('sitemap1.xml.gz') + end + + it 'should return the URL' do + expect(sitemap.location.url).to eq('http://example.com/test/sitemap1.xml.gz') + end + + it 'should return the path' do + expect(sitemap.location.path).to eq(File.expand_path('tmp/test/sitemap1.xml.gz')) + end + + it 'should be empty' do + expect(sitemap.empty?).to be(true) + expect(sitemap.link_count).to eq(0) + end + + it 'should not be finalized' do + expect(sitemap.finalized?).to be(false) + end + + it 'should raise if no default host is set' do + expect { SitemapGenerator::Builder::SitemapFile.new.location.url }.to raise_error(SitemapGenerator::SitemapError) + end + + describe 'lastmod' do + it 'should be the file last modified time' do + lastmod = (Time.now - 1209600) + sitemap.location.reserve_name + expect(File).to receive(:mtime).with(sitemap.location.path).and_return(lastmod) + expect(sitemap.lastmod).to eq(lastmod) + end + + it 'should be nil if the location has not reserved a name' do + expect(File).to receive(:mtime).never + expect(sitemap.lastmod).to be_nil + end + + it 'should be nil if location has reserved a name and the file DNE' do + sitemap.location.reserve_name + expect(File).to receive(:mtime).and_raise(Errno::ENOENT) + expect(sitemap.lastmod).to be_nil + end + end + + describe 'new' do + let(:original_sitemap) { sitemap } + let(:new_sitemap) { sitemap.new } + + before do + original_sitemap + new_sitemap + end + + it 'should inherit the same options' do + # The name is the same because the original sitemap was not finalized + expect(new_sitemap.location.url).to eq('http://example.com/test/sitemap1.xml.gz') + expect(new_sitemap.location.path).to eq(File.expand_path('tmp/test/sitemap1.xml.gz')) + end + + it 'should not share the same location instance' do + expect(new_sitemap.location).not_to be(original_sitemap.location) + end + + it 'should inherit the same namer instance' do + expect(new_sitemap.location.namer).to eq(original_sitemap.location.namer) + end + end + + describe 'reserve_name' do + it 'should reserve the name from the location' do + expect(sitemap.reserved_name?).to be(false) + expect(sitemap.location).to receive(:reserve_name).and_return('name') + sitemap.reserve_name + expect(sitemap.reserved_name?).to be(true) + expect(sitemap.instance_variable_get(:@reserved_name)).to eq('name') + end + + it 'should be safe to call multiple times' do + expect(sitemap.location).to receive(:reserve_name).and_return('name').once + sitemap.reserve_name + sitemap.reserve_name + end + end + + describe 'add' do + it 'should use the host provided' do + url = SitemapGenerator::Builder::SitemapUrl.new('/one', :host => 'http://newhost.com/') + expect(SitemapGenerator::Builder::SitemapUrl).to receive(:new).with('/one', :host => 'http://newhost.com').and_return(url) + sitemap.add '/one', :host => 'http://newhost.com' + end + + it 'should use the host from the location' do + url = SitemapGenerator::Builder::SitemapUrl.new('/one', :host => 'http://example.com/') + expect(SitemapGenerator::Builder::SitemapUrl).to receive(:new).with('/one', :host => 'http://example.com/').and_return(url) + sitemap.add '/one' + end + end + + describe 'file_can_fit?' do + let(:link_count) { 10 } + + before do + expect(sitemap).to receive(:max_sitemap_links).and_return(max_sitemap_links) + sitemap.instance_variable_set(:@link_count, link_count) + end + + context 'when link count is less than max' do + let(:max_sitemap_links) { link_count + 1 } + + it 'returns true' do + expect(sitemap.file_can_fit?(1)).to be(true) + end + end + + context 'when link count is at max' do + let(:max_sitemap_links) { link_count } + + it 'returns true' do + expect(sitemap.file_can_fit?(1)).to be(false) + end + end + end + + describe 'max_sitemap_links' do + context 'when not present in the location' do + it 'returns SitemapGenerator::MAX_SITEMAP_LINKS' do + expect(sitemap.max_sitemap_links).to eq(SitemapGenerator::MAX_SITEMAP_LINKS) + end + end + + context 'when present in the location' do + before do + expect(sitemap.location).to receive(:[]).with(:max_sitemap_links).and_return(10) + end + + it 'returns the value from the location' do + expect(sitemap.max_sitemap_links).to eq(10) + end + end + end +end diff --git a/spec/sitemap_generator/builder/sitemap_index_file_spec.rb b/spec/sitemap_generator/builder/sitemap_index_file_spec.rb new file mode 100644 index 00000000..28852ae6 --- /dev/null +++ b/spec/sitemap_generator/builder/sitemap_index_file_spec.rb @@ -0,0 +1,124 @@ +require 'spec_helper' + +describe 'SitemapGenerator::Builder::SitemapIndexFile' do + let(:location) { SitemapGenerator::SitemapLocation.new(:filename => 'sitemap.xml.gz', :public_path => '/public/', :host => 'http://example.com/') } + let(:index) { SitemapGenerator::Builder::SitemapIndexFile.new(location) } + + before do + index.location[:sitemaps_path] = 'test/' + end + + it 'should return the URL' do + expect(index.location.url).to eq('http://example.com/test/sitemap.xml.gz') + end + + it 'should return the path' do + expect(index.location.path).to eq('/public/test/sitemap.xml.gz') + end + + it 'should be empty' do + expect(index.empty?).to be(true) + expect(index.link_count).to eq(0) + end + + it 'should not have a last modification data' do + expect(index.lastmod).to be_nil + end + + it 'should not be finalized' do + expect(index.finalized?).to be(false) + end + + it 'filename should be set' do + expect(index.location.filename).to eq('sitemap.xml.gz') + end + + it 'should have a default namer' do + index = SitemapGenerator::Builder::SitemapIndexFile.new + expect(index.location.filename).to eq('sitemap.xml.gz') + end + + describe 'link_count' do + it 'should return the link count' do + index.instance_variable_set(:@link_count, 10) + expect(index.link_count).to eq(10) + end + end + + describe 'create_index?' do + it 'should return false' do + index.location[:create_index] = false + expect(index.create_index?).to be(false) + + index.instance_variable_set(:@link_count, 10) + expect(index.create_index?).to be(false) + end + + it 'should return true' do + index.location[:create_index] = true + expect(index.create_index?).to be(true) + + index.instance_variable_set(:@link_count, 1) + expect(index.create_index?).to be(true) + end + + it 'when :auto, should be true if more than one link' do + index.instance_variable_set(:@link_count, 1) + index.location[:create_index] = :auto + expect(index.create_index?).to be(false) + + index.instance_variable_set(:@link_count, 2) + expect(index.create_index?).to be(true) + end + end + + describe 'add' do + it 'should use the host provided' do + url = SitemapGenerator::Builder::SitemapIndexUrl.new('/one', :host => 'http://newhost.com/') + expect(SitemapGenerator::Builder::SitemapIndexUrl).to receive(:new).with('/one', :host => 'http://newhost.com').and_return(url) + index.add '/one', :host => 'http://newhost.com' + end + + it 'should use the host from the location' do + url = SitemapGenerator::Builder::SitemapIndexUrl.new('/one', :host => 'http://example.com/') + expect(SitemapGenerator::Builder::SitemapIndexUrl).to receive(:new).with('/one', :host => 'http://example.com/').and_return(url) + index.add '/one' + end + + describe 'when adding manually' do + it 'should reserve a name' do + expect(index).to receive(:reserve_name) + index.add '/link' + end + + it 'should create index' do + expect(index.create_index?).to be(false) + index.add '/one' + expect(index.create_index?).to be(true) + end + end + end + + describe 'index_url' do + it 'when not creating an index, should be the first sitemap url' do + index.instance_variable_set(:@create_index, false) + index.instance_variable_set(:@first_sitemap_url, 'http://test.com/index.xml') + expect(index.create_index?).to be(false) + expect(index.index_url).to eq('http://test.com/index.xml') + end + + it 'if there\'s no first sitemap url, should default to the index location url' do + index.instance_variable_set(:@create_index, false) + index.instance_variable_set(:@first_sitemap_url, nil) + expect(index.create_index?).to be(false) + expect(index.index_url).to eq(index.location.url) + expect(index.index_url).to eq('http://example.com/test/sitemap.xml.gz') + end + + it 'when creating an index, should be the index location url' do + index.instance_variable_set(:@create_index, true) + expect(index.index_url).to eq(index.location.url) + expect(index.index_url).to eq('http://example.com/test/sitemap.xml.gz') + end + end +end diff --git a/spec/sitemap_generator/builder/sitemap_index_url_spec.rb b/spec/sitemap_generator/builder/sitemap_index_url_spec.rb new file mode 100644 index 00000000..0b95b12a --- /dev/null +++ b/spec/sitemap_generator/builder/sitemap_index_url_spec.rb @@ -0,0 +1,28 @@ +require 'spec_helper' + +describe SitemapGenerator::Builder::SitemapIndexUrl do + let(:index) { + SitemapGenerator::Builder::SitemapIndexFile.new( + :sitemaps_path => 'sitemaps/', + :host => 'http://test.com', + :filename => 'sitemap_index.xml.gz' + ) + } + let(:url) { SitemapGenerator::Builder::SitemapUrl.new(index) } + + it 'should return the correct url' do + expect(url[:loc]).to eq('http://test.com/sitemaps/sitemap_index.xml.gz') + end + + it 'should use the host from the index' do + host = 'http://myexample.com' + expect(index.location).to receive(:host).and_return(host) + expect(url[:host]).to eq(host) + end + + it 'should use the public path for the link' do + path = '/path' + expect(index.location).to receive(:path_in_public).and_return(path) + expect(url[:loc]).to eq('http://test.com/path') + end +end diff --git a/spec/sitemap_generator/builder/sitemap_url_spec.rb b/spec/sitemap_generator/builder/sitemap_url_spec.rb new file mode 100644 index 00000000..fabe5735 --- /dev/null +++ b/spec/sitemap_generator/builder/sitemap_url_spec.rb @@ -0,0 +1,194 @@ +require 'spec_helper' + +describe SitemapGenerator::Builder::SitemapUrl do + let(:loc) { + SitemapGenerator::SitemapLocation.new( + :sitemaps_path => 'sitemaps/', + :public_path => '/public', + :host => 'http://test.com', + :namer => SitemapGenerator::SimpleNamer.new(:sitemap) + )} + let(:sitemap_file) { SitemapGenerator::Builder::SitemapFile.new(loc) } + + def new_url(*args) + if args.empty? + args = ['/home', { :host => 'http://example.com' }] + end + SitemapGenerator::Builder::SitemapUrl.new(*args) + end + + it 'should build urls for sitemap files' do + url = SitemapGenerator::Builder::SitemapUrl.new(sitemap_file) + expect(url[:loc]).to eq('http://test.com/sitemaps/sitemap.xml.gz') + end + + it 'lastmod should default to the last modified date for sitemap files' do + lastmod = (Time.now - 1209600) + expect(sitemap_file).to receive(:lastmod).and_return(lastmod) + url = SitemapGenerator::Builder::SitemapUrl.new(sitemap_file) + expect(url[:lastmod]).to eq(lastmod) + end + + it 'should support string option keys' do + url = new_url('/home', 'host' => 'http://string.com', 'priority' => 1) + expect(url[:priority]).to eq(1) + expect(url[:host]).to eq('http://string.com') + end + + it 'should support subdirectory routing' do + url = SitemapGenerator::Builder::SitemapUrl.new('/profile', :host => 'http://example.com/subdir/') + expect(url[:loc]).to eq('http://example.com/subdir/profile') + url = SitemapGenerator::Builder::SitemapUrl.new('profile', :host => 'http://example.com/subdir/') + expect(url[:loc]).to eq('http://example.com/subdir/profile') + url = SitemapGenerator::Builder::SitemapUrl.new('/deep/profile/', :host => 'http://example.com/subdir/') + expect(url[:loc]).to eq('http://example.com/subdir/deep/profile/') + url = SitemapGenerator::Builder::SitemapUrl.new('/deep/profile', :host => 'http://example.com/subdir') + expect(url[:loc]).to eq('http://example.com/subdir/deep/profile') + url = SitemapGenerator::Builder::SitemapUrl.new('deep/profile', :host => 'http://example.com/subdir') + expect(url[:loc]).to eq('http://example.com/subdir/deep/profile') + url = SitemapGenerator::Builder::SitemapUrl.new('deep/profile/', :host => 'http://example.com/subdir/') + expect(url[:loc]).to eq('http://example.com/subdir/deep/profile/') + url = SitemapGenerator::Builder::SitemapUrl.new('/', :host => 'http://example.com/subdir/') + expect(url[:loc]).to eq('http://example.com/subdir/') + end + + it 'should not fail on a nil path segment' do + expect do + expect(SitemapGenerator::Builder::SitemapUrl.new(nil, :host => 'http://example.com')[:loc]).to eq('http://example.com') + end.not_to raise_error + end + + it 'should support a :videos option' do + loc = SitemapGenerator::Builder::SitemapUrl.new('', :host => 'http://test.com', :videos => [1,2,3]) + expect(loc[:videos]).to eq([1,2,3]) + end + + it 'should support a singular :video option' do + loc = SitemapGenerator::Builder::SitemapUrl.new('', :host => 'http://test.com', :video => 1) + expect(loc[:videos]).to eq([1]) + end + + it 'should support an array :video option' do + loc = SitemapGenerator::Builder::SitemapUrl.new('', :host => 'http://test.com', :video => [1,2], :videos => [3,4]) + expect(loc[:videos]).to eq([3,4,1,2]) + end + + it 'should support a :alternates option' do + loc = SitemapGenerator::Builder::SitemapUrl.new('', :host => 'http://test.com', :alternates => [1,2,3]) + expect(loc[:alternates]).to eq([1,2,3]) + end + + it 'should support a singular :alternate option' do + loc = SitemapGenerator::Builder::SitemapUrl.new('', :host => 'http://test.com', :alternate => 1) + expect(loc[:alternates]).to eq([1]) + end + + it 'should support an array :alternate option' do + loc = SitemapGenerator::Builder::SitemapUrl.new('', :host => 'http://test.com', :alternate => [1,2], :alternates => [3,4]) + expect(loc[:alternates]).to eq([3,4,1,2]) + end + + it 'should not fail if invalid characters are used in the URL' do + special = ':$&+,;:=?@' + url = SitemapGenerator::Builder::SitemapUrl.new('/#{special}', :host => 'http://example.com/#{special}/') + expect(url[:loc]).to eq('http://example.com/#{special}/#{special}') + end + + describe 'w3c_date' do + it 'should convert dates and times to W3C format' do + url = new_url + expect(url.send(:w3c_date, Date.new(0))).to eq('0000-01-01') + expect(url.send(:w3c_date, Time.at(0).utc)).to eq('1970-01-01T00:00:00+00:00') + expect(url.send(:w3c_date, DateTime.new(0))).to eq('0000-01-01T00:00:00+00:00') + end + + it 'should return strings unmodified' do + expect(new_url.send(:w3c_date, '2010-01-01')).to eq('2010-01-01') + end + + it 'should try to convert to utc' do + time = Time.at(0) + expect(time).to receive(:respond_to?).and_return(false) + expect(time).to receive(:respond_to?).and_return(true) + expect(new_url.send(:w3c_date, time)).to eq('1970-01-01T00:00:00+00:00') + end + + it 'should include timezone for objects which do not respond to iso8601 or utc' do + time = Time.at(0) + expect(time).to receive(:respond_to?).and_return(false) + expect(time).to receive(:respond_to?).and_return(false) + expect(time).to receive(:strftime).and_return('+0800', '1970-01-01T00:00:00') + expect(new_url.send(:w3c_date, time)).to eq('1970-01-01T00:00:00+08:00') + end + + it 'should support integers' do + expect(new_url.send(:w3c_date, Time.at(0).to_i)).to eq('1970-01-01T00:00:00+00:00') + end + end + + describe 'yes_or_no' do + it 'should recognize truthy values' do + expect(new_url.send(:yes_or_no, 1)).to eq('yes') + expect(new_url.send(:yes_or_no, 0)).to eq('yes') + expect(new_url.send(:yes_or_no, 'yes')).to eq('yes') + expect(new_url.send(:yes_or_no, 'Yes')).to eq('yes') + expect(new_url.send(:yes_or_no, 'YES')).to eq('yes') + expect(new_url.send(:yes_or_no, true)).to eq('yes') + expect(new_url.send(:yes_or_no, Object.new)).to eq('yes') + end + + it 'should recognize falsy values' do + expect(new_url.send(:yes_or_no, nil)).to eq('no') + expect(new_url.send(:yes_or_no, 'no')).to eq('no') + expect(new_url.send(:yes_or_no, 'No')).to eq('no') + expect(new_url.send(:yes_or_no, 'NO')).to eq('no') + expect(new_url.send(:yes_or_no, false)).to eq('no') + end + + it 'should raise on unrecognized strings' do + expect { new_url.send(:yes_or_no, 'dunno') }.to raise_error(ArgumentError) + expect { new_url.send(:yes_or_no, 'yessir') }.to raise_error(ArgumentError) + end + end + + describe 'yes_or_no_with_default' do + it 'should use the default if the value is nil' do + url = new_url + expect(url).to receive(:yes_or_no).with(true).and_return('surely') + expect(url.send(:yes_or_no_with_default, nil, true)).to eq('surely') + end + + it 'should use the value if it is not nil' do + url = new_url + expect(url).to receive(:yes_or_no).with('surely').and_return('absolutely') + expect(url.send(:yes_or_no_with_default, 'surely', true)).to eq('absolutely') + end + end + + describe 'format_float' do + it 'should not modify if a string' do + expect(new_url.send(:format_float, '0.4')).to eq('0.4') + end + + it 'should round to one decimal place' do + url = new_url + expect(url.send(:format_float, 0.499999)).to eq('0.5') + expect(url.send(:format_float, 3.444444)).to eq('3.4') + end + end + + describe 'expires' do + let(:url) { SitemapGenerator::Builder::SitemapUrl.new('/path', :host => 'http://example.com', :expires => time) } + let(:time) { Time.at(0).utc } + + it 'should include the option' do + expect(url[:expires]).to eq(time) + end + + it 'should format it and include it in the XML' do + xml = url.to_xml + doc = Nokogiri::XML("#{xml}") + expect(doc.css('url expires').text).to eq(url.send(:w3c_date, time)) + end + end +end diff --git a/spec/sitemap_generator/core_ext/bigdecimal_spec.rb b/spec/sitemap_generator/core_ext/bigdecimal_spec.rb new file mode 100644 index 00000000..931d2c16 --- /dev/null +++ b/spec/sitemap_generator/core_ext/bigdecimal_spec.rb @@ -0,0 +1,20 @@ +require 'spec_helper' +require 'bigdecimal' + +describe SitemapGenerator::BigDecimal do + describe 'to_yaml' do + it 'should serialize correctly' do + expect(SitemapGenerator::BigDecimal.new('100000.30020320320000000000000000000000000000001').to_yaml).to match(/^--- 100000\.30020320320000000000000000000000000000001\n/) + expect(SitemapGenerator::BigDecimal.new('Infinity').to_yaml).to match(/^--- \.Inf\n/) + expect(SitemapGenerator::BigDecimal.new('NaN').to_yaml).to match(/^--- \.NaN\n/) + expect(SitemapGenerator::BigDecimal.new('-Infinity').to_yaml).to match(/^--- -\.Inf\n/) + end + end + + describe 'to_d' do + it 'should convert correctly' do + bd = SitemapGenerator::BigDecimal.new '10' + expect(bd.to_d).to eq(bd) + end + end +end diff --git a/spec/sitemap_generator/core_ext/numeric_spec.rb b/spec/sitemap_generator/core_ext/numeric_spec.rb new file mode 100644 index 00000000..c76d526a --- /dev/null +++ b/spec/sitemap_generator/core_ext/numeric_spec.rb @@ -0,0 +1,43 @@ +require 'spec_helper' + +describe SitemapGenerator::Numeric do + def numeric(size) + SitemapGenerator::Numeric.new(size) + end + + describe 'bytes' do + it 'should define equality of different units' do + relationships = { + numeric( 1024).bytes => numeric( 1).kilobyte, + numeric( 1024).kilobytes => numeric( 1).megabyte, + numeric(3584.0).kilobytes => numeric(3.5).megabytes, + numeric(3584.0).megabytes => numeric(3.5).gigabytes, + numeric(1).kilobyte ** 4 => numeric( 1).terabyte, + numeric(1024).kilobytes + numeric(2).megabytes => numeric(3).megabytes, + numeric( 2).gigabytes / 4 => numeric(512).megabytes, + numeric(256).megabytes * 20 +numeric( 5).gigabytes => numeric(10).gigabytes, + numeric(1).kilobyte ** 5 => numeric(1).petabyte, + numeric(1).kilobyte ** 6 => numeric(1).exabyte + } + + relationships.each do |left, right| + expect(left).to eq(right) + end + end + + it 'should represent units as bytes' do + expect(numeric(3).megabytes).to eq(3145728) + expect(numeric(3).megabyte) .to eq(3145728) + expect(numeric(3).kilobytes).to eq(3072) + expect(numeric(3).kilobyte) .to eq(3072) + expect(numeric(3).gigabytes).to eq(3221225472) + expect(numeric(3).gigabyte) .to eq(3221225472) + expect(numeric(3).terabytes).to eq(3298534883328) + expect(numeric(3).terabyte) .to eq(3298534883328) + expect(numeric(3).petabytes).to eq(3377699720527872) + expect(numeric(3).petabyte) .to eq(3377699720527872) + expect(numeric(3).exabytes) .to eq(3458764513820540928) + expect(numeric(3).exabyte) .to eq(3458764513820540928) + end + end +end diff --git a/spec/sitemap_generator/helpers/number_helper_spec.rb b/spec/sitemap_generator/helpers/number_helper_spec.rb new file mode 100644 index 00000000..ce6a7637 --- /dev/null +++ b/spec/sitemap_generator/helpers/number_helper_spec.rb @@ -0,0 +1,196 @@ +require 'spec_helper' +require 'sitemap_generator/helpers/number_helper' + +def kilobytes(number) + number * 1024 +end + +def megabytes(number) + kilobytes(number) * 1024 +end + +def gigabytes(number) + megabytes(number) * 1024 +end + +def terabytes(number) + gigabytes(number) * 1024 +end + +describe SitemapGenerator::Helpers::NumberHelper do + include SitemapGenerator::Helpers::NumberHelper + + it 'should number_with_delimiter' do + expect(number_with_delimiter(12345678)).to eq('12,345,678') + expect(number_with_delimiter(0)).to eq('0') + expect(number_with_delimiter(123)).to eq('123') + expect(number_with_delimiter(123456)).to eq('123,456') + expect(number_with_delimiter(123456.78)).to eq('123,456.78') + expect(number_with_delimiter(123456.789)).to eq('123,456.789') + expect(number_with_delimiter(123456.78901)).to eq('123,456.78901') + expect(number_with_delimiter(123456789.78901)).to eq('123,456,789.78901') + expect(number_with_delimiter(0.78901)).to eq('0.78901') + expect(number_with_delimiter('123456.78')).to eq('123,456.78') + end + + it 'should number_with_delimiter_with_options_hash' do + expect(number_with_delimiter(12345678, :delimiter => ' ')).to eq('12 345 678') + expect(number_with_delimiter(12345678.05, :separator => '-')).to eq('12,345,678-05') + expect(number_with_delimiter(12345678.05, :separator => ',', :delimiter => '.')).to eq('12.345.678,05') + expect(number_with_delimiter(12345678.05, :delimiter => '.', :separator => ',')).to eq('12.345.678,05') + end + + it 'should number_with_precision' do + expect(number_with_precision(-111.2346)).to eq('-111.235') + expect(number_with_precision(111.2346)).to eq('111.235') + expect(number_with_precision(31.825, :precision => 2)).to eq('31.83') + expect(number_with_precision(111.2346, :precision => 2)).to eq('111.23') + expect(number_with_precision(111, :precision => 2)).to eq('111.00') + expect(number_with_precision('111.2346')).to eq('111.235') + expect(number_with_precision('31.825', :precision => 2)).to eq('31.83') + expect(number_with_precision((32.6751 * 100.00), :precision => 0)).to eq('3268') + expect(number_with_precision(111.50, :precision => 0)).to eq('112') + expect(number_with_precision(1234567891.50, :precision => 0)).to eq('1234567892') + expect(number_with_precision(0, :precision => 0)).to eq('0') + expect(number_with_precision(0.001, :precision => 5)).to eq('0.00100') + expect(number_with_precision(0.00111, :precision => 3)).to eq('0.001') + # Odd difference between Ruby versions + if RUBY_VERSION < '1.9.3' + expect(number_with_precision(9.995, :precision => 2)).to eq('9.99') + else + expect(number_with_precision(9.995, :precision => 2)).to eq('10.00') + end + expect(number_with_precision(10.995, :precision => 2)).to eq('11.00') + end + + it 'should number_with_precision_with_custom_delimiter_and_separator' do + expect(number_with_precision(31.825, :precision => 2, :separator => ',')).to eq('31,83') + expect(number_with_precision(1231.825, :precision => 2, :separator => ',', :delimiter => '.')).to eq('1.231,83') + end + + it 'should number_with_precision_with_significant_digits' do + expect(number_with_precision(123987, :precision => 3, :significant => true)).to eq('124000') + expect(number_with_precision(123987876, :precision => 2, :significant => true )).to eq('120000000') + expect(number_with_precision('43523', :precision => 1, :significant => true )).to eq('40000') + expect(number_with_precision(9775, :precision => 4, :significant => true )).to eq('9775') + expect(number_with_precision(5.3923, :precision => 2, :significant => true )).to eq('5.4') + expect(number_with_precision(5.3923, :precision => 1, :significant => true )).to eq('5') + expect(number_with_precision(1.232, :precision => 1, :significant => true )).to eq('1') + expect(number_with_precision(7, :precision => 1, :significant => true )).to eq('7') + expect(number_with_precision(1, :precision => 1, :significant => true )).to eq('1') + expect(number_with_precision(52.7923, :precision => 2, :significant => true )).to eq('53') + expect(number_with_precision(9775, :precision => 6, :significant => true )).to eq('9775.00') + expect(number_with_precision(5.3929, :precision => 7, :significant => true )).to eq('5.392900') + expect(number_with_precision(0, :precision => 2, :significant => true )).to eq('0.0') + expect(number_with_precision(0, :precision => 1, :significant => true )).to eq('0') + expect(number_with_precision(0.0001, :precision => 1, :significant => true )).to eq('0.0001') + expect(number_with_precision(0.0001, :precision => 3, :significant => true )).to eq('0.000100') + expect(number_with_precision(0.0001111, :precision => 1, :significant => true )).to eq('0.0001') + expect(number_with_precision(9.995, :precision => 3, :significant => true)).to eq('10.0') + expect(number_with_precision(9.994, :precision => 3, :significant => true)).to eq('9.99') + expect(number_with_precision(10.995, :precision => 3, :significant => true)).to eq('11.0') + end + + it 'should number_with_precision_with_strip_insignificant_zeros' do + expect(number_with_precision(9775.43, :precision => 4, :strip_insignificant_zeros => true )).to eq('9775.43') + expect(number_with_precision(9775.2, :precision => 6, :significant => true, :strip_insignificant_zeros => true )).to eq('9775.2') + expect(number_with_precision(0, :precision => 6, :significant => true, :strip_insignificant_zeros => true )).to eq('0') + end + + it 'should number_with_precision_with_significant_true_and_zero_precision' do + # Zero precision with significant is a mistake (would always return zero), + # so we treat it as if significant was false (increases backwards compatibily for number_to_human_size) + expect(number_with_precision(123.987, :precision => 0, :significant => true)).to eq('124') + expect(number_with_precision(12, :precision => 0, :significant => true )).to eq('12') + expect(number_with_precision('12.3', :precision => 0, :significant => true )).to eq('12') + end + + it 'should number_to_human_size' do + expect(number_to_human_size(0)).to eq('0 Bytes') + expect(number_to_human_size(1)).to eq('1 Byte') + expect(number_to_human_size(3.14159265)).to eq('3 Bytes') + expect(number_to_human_size(123.0)).to eq('123 Bytes') + expect(number_to_human_size(123)).to eq('123 Bytes') + expect(number_to_human_size(1234)).to eq('1.21 KB') + expect(number_to_human_size(12345)).to eq('12.1 KB') + expect(number_to_human_size(1234567)).to eq('1.18 MB') + expect(number_to_human_size(1234567890)).to eq('1.15 GB') + expect(number_to_human_size(1234567890123)).to eq('1.12 TB') + expect(number_to_human_size(terabytes(1026))).to eq('1030 TB') + expect(number_to_human_size(kilobytes(444))).to eq('444 KB') + expect(number_to_human_size(megabytes(1023))).to eq('1020 MB') + expect(number_to_human_size(terabytes(3))).to eq('3 TB') + expect(number_to_human_size(1234567, :precision => 2)).to eq('1.2 MB') + expect(number_to_human_size(3.14159265, :precision => 4)).to eq('3 Bytes') + expect(number_to_human_size('123')).to eq('123 Bytes') + expect(number_to_human_size(kilobytes(1.0123), :precision => 2)).to eq('1 KB') + expect(number_to_human_size(kilobytes(1.0100), :precision => 4)).to eq('1.01 KB') + expect(number_to_human_size(kilobytes(10.000), :precision => 4)).to eq('10 KB') + expect(number_to_human_size(1.1)).to eq('1 Byte') + expect(number_to_human_size(10)).to eq('10 Bytes') + end + + it 'should number_to_human_size_with_options_hash' do + expect(number_to_human_size(1234567, :precision => 2)).to eq('1.2 MB') + expect(number_to_human_size(3.14159265, :precision => 4)).to eq('3 Bytes') + expect(number_to_human_size(kilobytes(1.0123), :precision => 2)).to eq('1 KB') + expect(number_to_human_size(kilobytes(1.0100), :precision => 4)).to eq('1.01 KB') + expect(number_to_human_size(kilobytes(10.000), :precision => 4)).to eq('10 KB') + expect(number_to_human_size(1234567890123, :precision => 1)).to eq('1 TB') + expect(number_to_human_size(524288000, :precision=>3)).to eq('500 MB') + expect(number_to_human_size(9961472, :precision=>0)).to eq('10 MB') + expect(number_to_human_size(41010, :precision => 1)).to eq('40 KB') + expect(number_to_human_size(41100, :precision => 2)).to eq('40 KB') + expect(number_to_human_size(kilobytes(1.0123), :precision => 2, :strip_insignificant_zeros => false)).to eq('1.0 KB') + expect(number_to_human_size(kilobytes(1.0123), :precision => 3, :significant => false)).to eq('1.012 KB') + number_to_human_size(kilobytes(1.0123), :precision => 0, :significant => true) #ignores significant it precision is 0.should == '1 KB' + end + + it 'should number_to_human_size_with_custom_delimiter_and_separator' do + expect(number_to_human_size(kilobytes(1.0123), :precision => 3, :separator => ',')) .to eq('1,01 KB') + expect(number_to_human_size(kilobytes(1.0100), :precision => 4, :separator => ',')) .to eq('1,01 KB') + expect(number_to_human_size(terabytes(1000.1), :precision => 5, :delimiter => '.', :separator => ',')) .to eq('1.000,1 TB') + end + + it 'should number_helpers_should_return_nil_when_given_nil' do + expect(number_with_delimiter(nil)).to be_nil + expect(number_with_precision(nil)).to be_nil + expect(number_to_human_size(nil)).to be_nil + end + + it 'should number_helpers_should_return_non_numeric_param_unchanged' do + expect(number_with_delimiter('x')).to eq('x') + expect(number_with_precision('x.')).to eq('x.') + expect(number_with_precision('x')).to eq('x') + expect(number_to_human_size('x')).to eq('x') + end + + it 'should number_helpers_should_raise_error_if_invalid_when_specified' do + expect do + number_to_human_size('x', :raise => true) + end.to raise_error(SitemapGenerator::Helpers::NumberHelper::InvalidNumberError) + begin + number_to_human_size('x', :raise => true) + rescue SitemapGenerator::Helpers::NumberHelper::InvalidNumberError => e + expect(e.number).to eq('x') + end + + expect do + number_with_precision('x', :raise => true) + end.to raise_error(SitemapGenerator::Helpers::NumberHelper::InvalidNumberError) + begin + number_with_precision('x', :raise => true) + rescue SitemapGenerator::Helpers::NumberHelper::InvalidNumberError => e + expect(e.number).to eq('x') + end + + expect do + number_with_delimiter('x', :raise => true) + end.to raise_error(SitemapGenerator::Helpers::NumberHelper::InvalidNumberError) + begin + number_with_delimiter('x', :raise => true) + rescue SitemapGenerator::Helpers::NumberHelper::InvalidNumberError => e + expect(e.number).to eq('x') + end + end +end diff --git a/spec/sitemap_generator/interpreter_spec.rb b/spec/sitemap_generator/interpreter_spec.rb new file mode 100644 index 00000000..52a63500 --- /dev/null +++ b/spec/sitemap_generator/interpreter_spec.rb @@ -0,0 +1,87 @@ +require 'spec_helper' +require 'sitemap_generator/interpreter' + +describe SitemapGenerator::Interpreter do + let(:link_set) { SitemapGenerator::LinkSet.new } + let(:interpreter) { SitemapGenerator::Interpreter.new(:link_set => link_set) } + + # The interpreter doesn't have the URL helpers included for some reason, so it + # fails when adding links. That messes up later specs unless we reset the sitemap object. + after :all do + SitemapGenerator::Sitemap.reset! + end + + it 'should find the config file if Rails.root doesn\'t end in a slash' do + stub_const('Rails', double('Rails', :root => SitemapGenerator.app.root.to_s.sub(/\/$/, ''))) + expect { SitemapGenerator::Interpreter.run }.not_to raise_error + end + + it 'should set the verbose option' do + expect_any_instance_of(SitemapGenerator::Interpreter).to receive(:instance_eval) + interpreter = SitemapGenerator::Interpreter.run(:verbose => true) + expect(interpreter.instance_variable_get(:@linkset).verbose).to be(true) + end + + describe 'link_set' do + it 'should default to the default LinkSet' do + expect(SitemapGenerator::Interpreter.new.sitemap).to be(SitemapGenerator::Sitemap) + end + + it 'should allow setting the LinkSet as an option' do + expect(interpreter.sitemap).to be(link_set) + end + end + + describe 'public interface' do + describe 'add' do + it 'should add a link to the sitemap' do + expect(link_set).to receive(:add).with('test', :option => 'value') + interpreter.add('test', :option => 'value') + end + end + + describe 'group' do + it 'should start a new group' do + expect(link_set).to receive(:group).with('test', :option => 'value') + interpreter.group('test', :option => 'value') + end + end + + describe 'sitemap' do + it 'should return the LinkSet' do + expect(interpreter.sitemap).to be(link_set) + end + end + + describe 'add_to_index' do + it 'should add a link to the sitemap index' do + expect(link_set).to receive(:add_to_index).with('test', :option => 'value') + interpreter.add_to_index('test', :option => 'value') + end + end + end + + describe 'eval' do + it 'should yield the LinkSet to the block' do + interpreter.eval(:yield_sitemap => true) do |sitemap| + expect(sitemap).to be(link_set) + end + end + + it 'should not yield the LinkSet to the block' do + # Assign self to a local variable so it is captured by the block + this = self + interpreter.eval(:yield_sitemap => false) do + this.expect(self).to this.be(this.interpreter) + end + end + + it 'should not yield the LinkSet to the block by default' do + # Assign self to a local variable so it is captured by the block + this = self + interpreter.eval do + this.expect(self).to this.be(this.interpreter) + end + end + end +end diff --git a/spec/sitemap_generator/link_set_spec.rb b/spec/sitemap_generator/link_set_spec.rb new file mode 100644 index 00000000..d583dcf6 --- /dev/null +++ b/spec/sitemap_generator/link_set_spec.rb @@ -0,0 +1,925 @@ +require 'spec_helper' + +describe SitemapGenerator::LinkSet do + let(:default_host) { 'http://example.com' } + let(:ls) { SitemapGenerator::LinkSet.new(:default_host => default_host) } + + describe 'initializer options' do + options = [:public_path, :sitemaps_path, :default_host, :filename, :search_engines, :max_sitemap_links] + values = [File.expand_path(SitemapGenerator.app.root + 'tmp/'), 'mobile/', 'http://myhost.com', :xxx, { :abc => '123' }, 10] + + options.zip(values).each do |option, value| + it 'should set #{option} to #{value}' do + ls = SitemapGenerator::LinkSet.new(option => value) + expect(ls.send(option)).to eq(value) + end + end + end + + describe 'default options' do + let(:ls) { SitemapGenerator::LinkSet.new } + + default_options = { + :filename => :sitemap, + :sitemaps_path => nil, + :public_path => SitemapGenerator.app.root + 'public/', + :default_host => nil, + :include_index => false, + :include_root => true, + :create_index => :auto, + :max_sitemap_links => SitemapGenerator::MAX_SITEMAP_LINKS + } + + default_options.each do |option, value| + it '#{option} should default to #{value}' do + expect(ls.send(option)).to eq(value) + end + end + end + + describe 'include_root include_index option' do + it 'should include the root url and the sitemap index url' do + ls = SitemapGenerator::LinkSet.new(:default_host => default_host, :include_root => true, :include_index => true) + expect(ls.include_root).to be(true) + expect(ls.include_index).to be(true) + ls.create { |sitemap| } + expect(ls.sitemap.link_count).to eq(2) + end + + it 'should not include the root url' do + ls = SitemapGenerator::LinkSet.new(:default_host => default_host, :include_root => false) + expect(ls.include_root).to be(false) + expect(ls.include_index).to be(false) + ls.create { |sitemap| } + expect(ls.sitemap.link_count).to eq(0) + end + + it 'should not include the sitemap index url' do + ls = SitemapGenerator::LinkSet.new(:default_host => default_host, :include_index => false) + expect(ls.include_root).to be(true) + expect(ls.include_index).to be(false) + ls.create { |sitemap| } + expect(ls.sitemap.link_count).to eq(1) + end + + it 'should not include the root url or the sitemap index url' do + ls = SitemapGenerator::LinkSet.new(:default_host => default_host, :include_root => false, :include_index => false) + expect(ls.include_root).to be(false) + expect(ls.include_index).to be(false) + ls.create { |sitemap| } + expect(ls.sitemap.link_count).to eq(0) + end + end + + describe 'sitemaps public_path' do + it 'should default to public/' do + path = SitemapGenerator.app.root + 'public/' + expect(ls.public_path).to eq(path) + expect(ls.sitemap.location.public_path).to eq(path) + expect(ls.sitemap_index.location.public_path).to eq(path) + end + + it 'should change when the public_path is changed' do + path = SitemapGenerator.app.root + 'tmp/' + ls.public_path = 'tmp/' + expect(ls.public_path).to eq(path) + expect(ls.sitemap.location.public_path).to eq(path) + expect(ls.sitemap_index.location.public_path).to eq(path) + end + + it 'should append a slash to the path' do + path = SitemapGenerator.app.root + 'tmp/' + ls.public_path = 'tmp' + expect(ls.public_path).to eq(path) + expect(ls.sitemap.location.public_path).to eq(path) + expect(ls.sitemap_index.location.public_path).to eq(path) + end + end + + describe 'sitemaps url' do + it 'should change when the default_host is changed' do + ls.default_host = 'http://one.com' + expect(ls.default_host).to eq('http://one.com') + expect(ls.default_host).to eq(ls.sitemap.location.host) + expect(ls.default_host).to eq(ls.sitemap_index.location.host) + end + + it 'should change when the sitemaps_path is changed' do + ls.default_host = 'http://one.com' + ls.sitemaps_path = 'sitemaps/' + expect(ls.sitemap.location.url).to eq('http://one.com/sitemaps/sitemap.xml.gz') + expect(ls.sitemap_index.location.url).to eq('http://one.com/sitemaps/sitemap.xml.gz') + end + + it 'should append a slash to the path' do + ls.default_host = 'http://one.com' + ls.sitemaps_path = 'sitemaps' + expect(ls.sitemap.location.url).to eq('http://one.com/sitemaps/sitemap.xml.gz') + expect(ls.sitemap_index.location.url).to eq('http://one.com/sitemaps/sitemap.xml.gz') + end + end + + describe 'sitemap_index_url' do + it 'should return the url to the index file' do + ls.default_host = default_host + expect(ls.sitemap_index.location.url).to eq("#{default_host}/sitemap.xml.gz") + expect(ls.sitemap_index_url).to eq(ls.sitemap_index.location.url) + end + end + + describe 'search_engines' do + it 'should have search engines by default' do + expect(ls.search_engines).to be_a(Hash) + expect(ls.search_engines.size).to eq(2) + end + + it 'should support being modified' do + ls.search_engines[:newengine] = 'abc' + expect(ls.search_engines.size).to eq(3) + end + + it 'should support being set to nil' do + ls = SitemapGenerator::LinkSet.new(:default_host => 'http://one.com', :search_engines => nil) + expect(ls.search_engines).to be_a(Hash) + expect(ls.search_engines).to be_empty + ls.search_engines = nil + expect(ls.search_engines).to be_a(Hash) + expect(ls.search_engines).to be_empty + end + end + + describe 'ping search engines' do + it 'should not fail' do + request = stub_request(:get, //) + expect { ls.ping_search_engines }.not_to raise_error + expect(request).to have_been_requested.at_least_once + end + + it 'should raise if no host is set' do + expect { SitemapGenerator::LinkSet.new.ping_search_engines }.to raise_error(SitemapGenerator::SitemapError, 'No value set for host') + end + + it 'should use the sitemap index url provided' do + index_url = 'http://example.com/index.xml' + ls = SitemapGenerator::LinkSet.new(:search_engines => { :google => 'http://google.com/?url=%s' }) + request = stub_request(:get, "http://google.com/?url=#{CGI.escape(index_url)}") + ls.ping_search_engines(index_url) + expect(request).to have_been_requested + end + + it 'should use the sitemap index url from the link set' do + ls = SitemapGenerator::LinkSet.new( + :default_host => default_host, + :search_engines => { :google => 'http://google.com/?url=%s' }) + index_url = ls.sitemap_index_url + request = stub_request(:get, "http://google.com/?url=#{CGI.escape(index_url)}") + ls.ping_search_engines + expect(request).to have_been_requested + end + + it 'should include the given search engines' do + ls.search_engines = nil + request = stub_request(:get, /^http:\/\/newnegine\.com\?/) + ls.ping_search_engines(:newengine => 'http://newnegine.com?%s') + expect(request).to have_been_requested + + WebMock.reset_executed_requests! + ls.ping_search_engines(:newengine => 'http://newnegine.com?%s', :anotherengine => 'http://newnegine.com?%s') + expect(request).to have_been_requested.twice + end + end + + describe 'verbose' do + it 'should be set as an initialize option' do + expect(SitemapGenerator::LinkSet.new(:default_host => default_host, :verbose => false).verbose).to be(false) + expect(SitemapGenerator::LinkSet.new(:default_host => default_host, :verbose => true).verbose).to be(true) + end + + it 'should be set as an accessor' do + ls.verbose = true + expect(ls.verbose).to be(true) + ls.verbose = false + expect(ls.verbose).to be(false) + end + + it 'should use SitemapGenerator.verbose as a default' do + expect(SitemapGenerator).to receive(:verbose).and_return(true).twice + expect(SitemapGenerator::LinkSet.new.verbose).to be(true) + end + + it 'should use SitemapGenerator.verbose as a default' do + expect(SitemapGenerator).to receive(:verbose).and_return(false).twice + expect(SitemapGenerator::LinkSet.new.verbose).to be(false) + end + end + + describe 'when finalizing' do + let(:ls) { SitemapGenerator::LinkSet.new(:default_host => default_host, :verbose => true, :create_index => true) } + + it 'should output summary lines' do + expect(ls.sitemap.location).to receive(:summary) + expect(ls.sitemap_index.location).to receive(:summary) + ls.finalize! + end + end + + describe 'sitemaps host' do + let(:new_host) { 'http://wowza.com' } + + it 'should have a host' do + ls.default_host = default_host + expect(ls.default_host).to eq(default_host) + end + + it 'should default to default host' do + expect(ls.sitemaps_host).to eq(ls.default_host) + end + + it 'should update the host in the sitemaps when changed' do + ls.sitemaps_host = new_host + expect(ls.sitemaps_host).to eq(new_host) + expect(ls.sitemap.location.host).to eq(ls.sitemaps_host) + expect(ls.sitemap_index.location.host).to eq(ls.sitemaps_host) + end + + it 'should not change the default host for links' do + ls.sitemaps_host = new_host + expect(ls.default_host).to eq(default_host) + end + end + + describe 'with a sitemap index specified' do + before do + @index = SitemapGenerator::Builder::SitemapIndexFile.new(:host => default_host) + @ls = SitemapGenerator::LinkSet.new(:sitemap_index => @index, :sitemaps_host => 'http://newhost.com') + end + + it 'should not modify the index' do + @ls.filename = :newname + expect(@ls.sitemap.location.filename).to match(/newname/) + @ls.sitemap_index.location.filename =~ /sitemap/ + end + + it 'should not modify the index' do + @ls.sitemaps_host = 'http://newhost.com' + expect(@ls.sitemap.location.host).to eq('http://newhost.com') + expect(@ls.sitemap_index.location.host).to eq(default_host) + end + + it 'should not finalize the index' do + @ls.send(:finalize_sitemap_index!) + expect(@ls.sitemap_index.finalized?).to be(false) + end + end + + describe 'new group' do + describe 'general behaviour' do + it 'should return a LinkSet' do + expect(ls.group).to be_a(SitemapGenerator::LinkSet) + end + + it 'should inherit the index' do + expect(ls.group.sitemap_index).to eq(ls.sitemap_index) + end + + it 'should protect the sitemap_index' do + expect(ls.group.instance_variable_get(:@protect_index)).to be(true) + end + + it 'should not allow chaning the public_path' do + expect(ls.group(:public_path => 'new/path/').public_path.to_s).to eq(ls.public_path.to_s) + end + end + + describe 'include_index' do + it 'should set the value' do + expect(ls.group(:include_index => !ls.include_index).include_index).not_to eq(ls.include_index) + end + + it 'should default to false' do + expect(ls.group.include_index).to be(false) + end + end + + describe 'include_root' do + it 'should set the value' do + expect(ls.group(:include_root => !ls.include_root).include_root).not_to eq(ls.include_root) + end + + it 'should default to false' do + expect(ls.group.include_root).to be(false) + end + end + + describe 'filename' do + it 'should inherit the value' do + expect(ls.group.filename).to eq(:sitemap) + end + + it 'should set the value' do + group = ls.group(:filename => :xxx) + expect(group.filename).to eq(:xxx) + expect(group.sitemap.location.filename).to match(/xxx/) + end + end + + describe 'verbose' do + it 'should inherit the value' do + expect(ls.group.verbose).to eq(ls.verbose) + end + + it 'should set the value' do + expect(ls.group(:verbose => !ls.verbose).verbose).not_to eq(ls.verbose) + end + end + + describe 'sitemaps_path' do + it 'should inherit the sitemaps_path' do + group = ls.group + expect(group.sitemaps_path).to eq(ls.sitemaps_path) + expect(group.sitemap.location.sitemaps_path).to eq(ls.sitemap.location.sitemaps_path) + end + + it 'should set the sitemaps_path' do + path = 'new/path' + group = ls.group(:sitemaps_path => path) + expect(group.sitemaps_path).to eq(path) + expect(group.sitemap.location.sitemaps_path.to_s).to eq('new/path/') + end + end + + describe 'default_host' do + it 'should inherit the default_host' do + expect(ls.group.default_host).to eq(default_host) + end + + it 'should set the default_host' do + host = 'http://defaulthost.com' + group = ls.group(:default_host => host) + expect(group.default_host).to eq(host) + expect(group.sitemap.location.host).to eq(host) + end + end + + describe 'sitemaps_host' do + it 'should set the sitemaps host' do + @host = 'http://sitemaphost.com' + @group = ls.group(:sitemaps_host => @host) + expect(@group.sitemaps_host).to eq(@host) + expect(@group.sitemap.location.host).to eq(@host) + end + + it 'should finalize the sitemap if it is the only option' do + expect(ls).to receive(:finalize_sitemap!) + ls.group(:sitemaps_host => 'http://test.com') {} + end + + it 'should use the same namer' do + @group = ls.group(:sitemaps_host => 'http://test.com') {} + expect(@group.sitemap.location.namer).to eq(ls.sitemap.location.namer) + end + end + + describe 'namer' do + it 'should inherit the value' do + expect(ls.group.namer).to eq(ls.namer) + expect(ls.group.sitemap.location.namer).to eq(ls.namer) + end + + it 'should set the value' do + namer = SitemapGenerator::SimpleNamer.new(:xxx) + group = ls.group(:namer => namer) + expect(group.namer).to eq(namer) + expect(group.sitemap.location.namer).to eq(namer) + expect(group.sitemap.location.filename).to match(/xxx/) + end + end + + describe 'create_index' do + it 'should inherit the value' do + expect(ls.group.create_index).to eq(ls.create_index) + ls.create_index = :some_value + expect(ls.group.create_index).to eq(:some_value) + end + + it 'should set the value' do + group = ls.group(:create_index => :some_value) + expect(group.create_index).to eq(:some_value) + end + end + + describe 'should share the current sitemap' do + it 'if only default_host is passed' do + group = ls.group(:default_host => 'http://newhost.com') + expect(group.sitemap).to eq(ls.sitemap) + expect(group.sitemap.location.host).to eq('http://newhost.com') + end + end + + describe 'should not share the current sitemap' do + { + :filename => :xxx, + :sitemaps_path => 'en/', + :namer => SitemapGenerator::SimpleNamer.new(:sitemap) + }.each do |key, value| + it 'if #{key} is present' do + expect(ls.group(key => value).sitemap).not_to eq(ls.sitemap) + end + end + end + + describe 'finalizing' do + it 'should only finalize the sitemaps if a block is passed' do + @group = ls.group + expect(@group.sitemap.finalized?).to be(false) + end + + it 'should not finalize the sitemap if a group is created' do + ls.create { group {} } + expect(ls.sitemap.empty?).to be(true) + expect(ls.sitemap.finalized?).to be(false) + end + + {:sitemaps_path => 'en/', + :filename => :example, + :namer => SitemapGenerator::SimpleNamer.new(:sitemap) + }.each do |k, v| + + it 'should not finalize the sitemap if #{k} is present' do + expect(ls).to receive(:finalize_sitemap!).never + ls.group(k => v) { } + end + end + end + + describe 'adapter' do + it 'should inherit the current adapter' do + ls.adapter = Object.new + group = ls.group + expect(group).not_to be(ls) + expect(group.adapter).to be(ls.adapter) + end + + it 'should set the value' do + adapter = Object.new + group = ls.group(:adapter => adapter) + expect(group.adapter).to be(adapter) + end + end + end + + describe 'after create' do + it 'should finalize the sitemap index' do + ls.create {} + expect(ls.sitemap_index.finalized?).to be(true) + end + + it 'should finalize the sitemap' do + ls.create {} + expect(ls.sitemap.finalized?).to be(true) + end + + it 'should not finalize the sitemap if a group was created' do + ls.instance_variable_set(:@created_group, true) + ls.send(:finalize_sitemap!) + expect(ls.sitemap.finalized?).to be(false) + end + end + + describe 'options to create' do + before do + expect(ls).to receive(:finalize!) + end + + it 'should set include_index' do + original = ls.include_index + expect(ls.create(:include_index => !original).include_index).not_to eq(original) + end + + it 'should set include_root' do + original = ls.include_root + expect(ls.create(:include_root => !original).include_root).not_to eq(original) + end + + it 'should set the filename' do + ls.create(:filename => :xxx) + expect(ls.filename).to eq(:xxx) + expect(ls.sitemap.location.filename).to match(/xxx/) + end + + it 'should set verbose' do + original = ls.verbose + expect(ls.create(:verbose => !original).verbose).not_to eq(original) + end + + it 'should set the sitemaps_path' do + path = 'new/path' + ls.create(:sitemaps_path => path) + expect(ls.sitemaps_path).to eq(path) + expect(ls.sitemap.location.sitemaps_path.to_s).to eq('new/path/') + end + + it 'should set the default_host' do + host = 'http://defaulthost.com' + ls.create(:default_host => host) + expect(ls.default_host).to eq(host) + expect(ls.sitemap.location.host).to eq(host) + end + + it 'should set the sitemaps host' do + host = 'http://sitemaphost.com' + ls.create(:sitemaps_host => host) + expect(ls.sitemaps_host).to eq(host) + expect(ls.sitemap.location.host).to eq(host) + end + + it 'should set the namer' do + namer = SitemapGenerator::SimpleNamer.new(:xxx) + ls.create(:namer => namer) + expect(ls.namer).to eq(namer) + expect(ls.sitemap.location.namer).to eq(namer) + expect(ls.sitemap.location.filename).to match(/xxx/) + end + + it 'should support both namer and filename options' do + namer = SitemapGenerator::SimpleNamer.new('sitemap2') + ls.create(:namer => namer, :filename => 'sitemap1') + expect(ls.namer).to eq(namer) + expect(ls.sitemap.location.namer).to eq(namer) + expect(ls.sitemap.location.filename).to match(/^sitemap2/) + expect(ls.sitemap_index.location.filename).to match(/^sitemap2/) + end + + it 'should support both namer and filename options no matter the order' do + options = { + :namer => SitemapGenerator::SimpleNamer.new('sitemap1'), + :filename => 'sitemap2' + } + ls.create(options) + expect(ls.sitemap.location.filename).to match(/^sitemap1/) + expect(ls.sitemap_index.location.filename).to match(/^sitemap1/) + end + + it 'should not modify the options hash' do + options = { :filename => 'sitemaptest', :verbose => false } + ls.create(options) + expect(options).to eq({ :filename => 'sitemaptest', :verbose => false }) + end + + it 'should set create_index' do + ls.create(:create_index => :auto) + expect(ls.create_index).to eq(:auto) + end + end + + describe 'reset!' do + it 'should reset the sitemap namer' do + expect(SitemapGenerator::Sitemap.namer).to receive(:reset) + SitemapGenerator::Sitemap.create(:default_host => 'http://cnn.com') + end + + it 'should reset the default link variable' do + SitemapGenerator::Sitemap.instance_variable_set(:@added_default_links, true) + SitemapGenerator::Sitemap.create(:default_host => 'http://cnn.com') + SitemapGenerator::Sitemap.instance_variable_set(:@added_default_links, false) + end + end + + describe 'include_root?' do + it 'should return false' do + ls.include_root = false + expect(ls.include_root).to be(false) + end + + it 'should return true' do + ls.include_root = true + expect(ls.include_root).to be(true) + end + end + + describe 'include_index?' do + let(:sitemaps_host) { 'http://amazon.com' } + + it 'should be true if no sitemaps_host set, or it is the same' do + ls.include_index = true + ls.sitemaps_host = default_host + expect(ls.include_index?).to be(true) + + ls.sitemaps_host = nil + expect(ls.include_index?).to be(true) + end + + it 'should be false if include_index is false or sitemaps_host differs' do + ls.include_index = false + ls.sitemaps_host = default_host + expect(ls.include_index?).to be(false) + + ls.include_index = true + ls.sitemaps_host = sitemaps_host + expect(ls.include_index?).to be(false) + end + + it 'should return false' do + ls = SitemapGenerator::LinkSet.new(:default_host => default_host, :sitemaps_host => sitemaps_host) + expect(ls.include_index?).to be(false) + end + end + + describe 'output' do + it 'should not output' do + ls.verbose = false + expect(ls).to receive(:puts).never + ls.send(:output, '') + end + + it 'should print the given string' do + ls.verbose = true + expect(ls).to receive(:puts).with('') + ls.send(:output, '') + end + end + + describe 'yield_sitemap' do + it 'should default to the value of SitemapGenerator.yield_sitemap?' do + expect(SitemapGenerator).to receive(:yield_sitemap?).and_return(true) + expect(ls.yield_sitemap?).to be(true) + expect(SitemapGenerator).to receive(:yield_sitemap?).and_return(false) + expect(ls.yield_sitemap?).to be(false) + end + + it 'should be settable as an option' do + expect(SitemapGenerator).to receive(:yield_sitemap?).never + expect(SitemapGenerator::LinkSet.new(:yield_sitemap => true).yield_sitemap?).to be(true) + expect(SitemapGenerator::LinkSet.new(:yield_sitemap => false).yield_sitemap?).to be(false) + end + + it 'should be settable as an attribute' do + ls.yield_sitemap = true + expect(ls.yield_sitemap?).to be(true) + ls.yield_sitemap = false + expect(ls.yield_sitemap?).to be(false) + end + + it 'should yield the sitemap in the call to create' do + expect(ls.send(:interpreter)).to receive(:eval).with(:yield_sitemap => true) + ls.yield_sitemap = true + ls.create + expect(ls.send(:interpreter)).to receive(:eval).with(:yield_sitemap => false) + ls.yield_sitemap = false + ls.create + end + end + + describe 'add' do + it 'should not modify the options hash' do + options = { :host => 'http://newhost.com' } + ls.add('/home', options) + expect(options).to eq({ :host => 'http://newhost.com' }) + end + + it 'should add the link to the sitemap and include the default host' do + expect(ls).to receive(:add_default_links) + expect(ls.sitemap).to receive(:add).with('/home', :host => ls.default_host) + ls.add('/home') + end + + it 'should allow setting of a custom host' do + expect(ls).to receive(:add_default_links) + expect(ls.sitemap).to receive(:add).with('/home', :host => 'http://newhost.com') + ls.add('/home', :host => 'http://newhost.com') + end + + it 'should add the default links if they have not been added' do + expect(ls).to receive(:add_default_links) + ls.add('/home') + end + end + + describe 'add_to_index' do + it 'should add the link to the sitemap index and pass options' do + expect(ls.sitemap_index).to receive(:add).with('/test', hash_including(:option => 'value')) + ls.add_to_index('/test', :option => 'value') + end + + it 'should not modify the options hash' do + options = { :host => 'http://newhost.com' } + ls.add_to_index('/home', options) + expect(options).to eq({ :host => 'http://newhost.com' }) + end + + describe 'host' do + it 'should be the sitemaps_host' do + ls.sitemaps_host = 'http://sitemapshost.com' + expect(ls.sitemap_index).to receive(:add).with('/home', :host => 'http://sitemapshost.com') + ls.add_to_index('/home') + end + + it 'should be the default_host if no sitemaps_host set' do + expect(ls.sitemap_index).to receive(:add).with('/home', :host => ls.default_host) + ls.add_to_index('/home') + end + + it 'should allow setting a custom host' do + expect(ls.sitemap_index).to receive(:add).with('/home', :host => 'http://newhost.com') + ls.add_to_index('/home', :host => 'http://newhost.com') + end + end + end + + describe 'create_index' do + let(:location) { SitemapGenerator::SitemapLocation.new(:namer => SitemapGenerator::SimpleNamer.new(:sitemap), :public_path => 'tmp/', :sitemaps_path => 'test/', :host => 'http://example.com/') } + let(:sitemap) { SitemapGenerator::Builder::SitemapFile.new(location) } + + describe 'when false' do + let(:ls) { SitemapGenerator::LinkSet.new(:default_host => default_host, :create_index => false) } + + it 'should not write the index' do + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.written?).to be(false) + end + + it 'should still add finalized sitemaps to the index (but the index is never finalized)' do + expect(ls).to receive(:add_to_index).with(ls.sitemap).once + ls.send(:finalize_sitemap!) + end + end + + describe 'when true' do + let(:ls) { SitemapGenerator::LinkSet.new(:default_host => default_host, :create_index => true) } + + it 'should always finalize the index' do + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.finalized?).to be(true) + end + + it 'should add finalized sitemaps to the index' do + expect(ls).to receive(:add_to_index).with(ls.sitemap).once + ls.send(:finalize_sitemap!) + end + end + + describe 'when :auto' do + let(:ls) { SitemapGenerator::LinkSet.new(:default_host => default_host, :create_index => :auto) } + + it 'should not write the index when it is empty' do + expect(ls.sitemap_index.empty?).to be(true) + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.written?).to be(false) + end + + it 'should add finalized sitemaps to the index' do + expect(ls).to receive(:add_to_index).with(ls.sitemap).once + ls.send(:finalize_sitemap!) + end + + it 'should write the index when a link is added manually' do + ls.sitemap_index.add '/test' + expect(ls.sitemap_index.empty?).to be(false) + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.written?).to be(true) + + # Test that the index url is reported correctly + expect(ls.sitemap_index.index_url).to eq('http://example.com/sitemap.xml.gz') + end + + it 'should not write the index when only one sitemap is added (considered internal usage)' do + ls.sitemap_index.add sitemap + expect(ls.sitemap_index.empty?).to be(false) + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.written?).to be(false) + + # Test that the index url is reported correctly + expect(ls.sitemap_index.index_url).to eq(sitemap.location.url) + end + + it 'should write the index when more than one sitemap is added (considered internal usage)' do + ls.sitemap_index.add sitemap + ls.sitemap_index.add sitemap.new + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.written?).to be(true) + + # Test that the index url is reported correctly + expect(ls.sitemap_index.index_url).to eq(ls.sitemap_index.location.url) + expect(ls.sitemap_index.index_url).to eq('http://example.com/sitemap.xml.gz') + end + + it 'should write the index when it has more than one link' do + ls.sitemap_index.add '/test1' + ls.sitemap_index.add '/test2' + ls.send(:finalize_sitemap_index!) + expect(ls.sitemap_index.written?).to be(true) + + # Test that the index url is reported correctly + expect(ls.sitemap_index.index_url).to eq('http://example.com/sitemap.xml.gz') + end + end + end + + describe 'when sitemap empty' do + before do + ls.include_root = false + end + + it 'should not be written' do + expect(ls.sitemap.empty?).to be(true) + expect(ls).to receive(:add_to_index).never + ls.send(:finalize_sitemap!) + end + + it 'should be written' do + ls.sitemap.add '/test' + expect(ls.sitemap.empty?).to be(false) + expect(ls).to receive(:add_to_index).with(ls.sitemap) + ls.send(:finalize_sitemap!) + end + end + + describe 'compress' do + it 'should be true by default' do + expect(ls.compress).to be(true) + end + + it 'should be set on the location objects' do + expect(ls.sitemap.location[:compress]).to be(true) + expect(ls.sitemap_index.location[:compress]).to be(true) + end + + it 'should be settable and gettable' do + ls.compress = false + expect(ls.compress).to be(false) + ls.compress = :all_but_first + expect(ls.compress).to eq(:all_but_first) + end + + it 'should update the location objects when set' do + ls.compress = false + expect(ls.sitemap.location[:compress]).to be(false) + expect(ls.sitemap_index.location[:compress]).to be(false) + end + + describe 'in groups' do + it 'should inherit the current compress setting' do + ls.compress = false + expect(ls.group.compress).to be(false) + end + + it 'should set the compress value' do + group = ls.group(:compress => false) + expect(group.compress).to be(false) + end + end + end + + describe 'max_sitemap_links' do + it 'can be set via initializer' do + ls = SitemapGenerator::LinkSet.new(:max_sitemap_links => 10) + expect(ls.max_sitemap_links).to eq(10) + end + + it 'can be set via accessor' do + ls.max_sitemap_links = 10 + expect(ls.max_sitemap_links).to eq(10) + end + end + + describe 'options_for_group' do + context 'max_sitemap_links' do + it 'inherits the current value' do + ls.max_sitemap_links = 10 + options = ls.send(:options_for_group, {}) + expect(options[:max_sitemap_links]).to eq(10) + end + + it 'returns the value when set' do + options = ls.send(:options_for_group, :max_sitemap_links => 10) + expect(options[:max_sitemap_links]).to eq(10) + end + end + end + + describe 'sitemap_location' do + it 'returns an instance initialized with values from the link set' do + expect(ls).to receive(:sitemaps_host).and_return(:host) + expect(ls).to receive(:namer).and_return(:namer) + expect(ls).to receive(:public_path).and_return(:public_path) + expect(ls).to receive(:verbose).and_return(:verbose) + expect(ls).to receive(:max_sitemap_links).and_return(:max_sitemap_links) + + ls.instance_variable_set(:@sitemaps_path, :sitemaps_path) + ls.instance_variable_set(:@adapter, :adapter) + ls.instance_variable_set(:@compress, :compress) + + expect(SitemapGenerator::SitemapLocation).to receive(:new).with( + :host => :host, + :namer => :namer, + :public_path => :public_path, + :sitemaps_path => :sitemaps_path, + :adapter => :adapter, + :verbose => :verbose, + :compress => :compress, + :max_sitemap_links => :max_sitemap_links + ) + ls.sitemap_location + end + end +end diff --git a/spec/sitemap_generator/sitemap_generator_spec.rb b/spec/sitemap_generator/sitemap_generator_spec.rb new file mode 100644 index 00000000..078e97fd --- /dev/null +++ b/spec/sitemap_generator/sitemap_generator_spec.rb @@ -0,0 +1,580 @@ +require 'spec_helper' +require 'cgi' + +class Holder + class << self + attr_accessor :executed + end +end + +def with_max_links(num) + original = SitemapGenerator::Sitemap.max_sitemap_links + SitemapGenerator::Sitemap.max_sitemap_links = num + yield +ensure + SitemapGenerator::Sitemap.max_sitemap_links = original +end + +describe 'SitemapGenerator' do + describe 'reset!' do + before do + SitemapGenerator::Sitemap.default_host # Force initialization of the LinkSet + end + + it 'should set a new LinkSet instance' do + first = SitemapGenerator::Sitemap.instance_variable_get(:@link_set) + expect(first).to be_a(SitemapGenerator::LinkSet) + SitemapGenerator::Sitemap.reset! + second = SitemapGenerator::Sitemap.instance_variable_get(:@link_set) + expect(second).to be_a(SitemapGenerator::LinkSet) + expect(first).not_to be(second) + end + end + + describe 'root' do + it 'should be set to the root of the gem' do + expect(SitemapGenerator.root).to eq(File.expand_path('../../../' , __FILE__)) + end + end + + describe 'generate sitemap with normal config' do + before :all do + SitemapGenerator::Sitemap.reset! + clean_sitemap_files_from_rails_app + copy_sitemap_file_to_rails_app(:create) + with_max_links(10) { execute_sitemap_config } + end + + it 'should create sitemaps' do + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + file_should_not_exist(rails_path('public/sitemap3.xml.gz')) + end + + it 'should have 13 links' do + expect(SitemapGenerator::Sitemap.link_count).to eq(13) + end + + it 'index XML should validate' do + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + end + + it 'sitemap XML should validate' do + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap1.xml.gz'), 'sitemap' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap2.xml.gz'), 'sitemap' + end + + it 'index XML should not have excess whitespace' do + gzipped_xml_file_should_have_minimal_whitespace rails_path('public/sitemap.xml.gz') + end + + it 'sitemap XML should not have excess whitespace' do + gzipped_xml_file_should_have_minimal_whitespace rails_path('public/sitemap1.xml.gz') + end + end + + describe 'sitemap with groups' do + before :all do + SitemapGenerator::Sitemap.reset! + clean_sitemap_files_from_rails_app + copy_sitemap_file_to_rails_app(:groups) + with_max_links(2) { execute_sitemap_config } + @expected = %w[ + public/en/xxx.xml.gz + public/fr/abc3.xml.gz + public/fr/abc4.xml.gz + public/fr/def.xml.gz + public/fr/new_sitemaps.xml.gz + public/fr/new_sitemaps1.xml.gz + public/fr/new_sitemaps2.xml.gz + public/fr/new_sitemaps3.xml.gz + public/fr/new_sitemaps4.xml.gz] + @sitemaps = (@expected - %w[public/fr/new_sitemaps.xml.gz]) + end + + it 'should create sitemaps' do + @expected.each { |file| file_should_exist(rails_path(file)) } + file_should_not_exist(rails_path('public/fr/new_sitemaps5.xml.gz')) + file_should_not_exist(rails_path('public/en/xxx1.xml.gz')) + file_should_not_exist(rails_path('public/fr/abc5.xml.gz')) + end + + it 'should have 16 links' do + expect(SitemapGenerator::Sitemap.link_count).to eq(16) + end + + it 'index XML should validate' do + gzipped_xml_file_should_validate_against_schema rails_path('public/fr/new_sitemaps.xml.gz'), 'siteindex' + end + + it 'index XML should not have excess whitespace' do + gzipped_xml_file_should_have_minimal_whitespace rails_path('public/fr/new_sitemaps.xml.gz') + end + + it 'sitemaps XML should validate' do + @sitemaps.each { |file| gzipped_xml_file_should_validate_against_schema(rails_path(file), 'sitemap') } + end + + it 'sitemap XML should not have excess whitespace' do + @sitemaps.each { |file| gzipped_xml_file_should_have_minimal_whitespace(rails_path(file)) } + end + end + + describe 'should handle links added manually' do + before do + clean_sitemap_files_from_rails_app + ::SitemapGenerator::Sitemap.reset! + ::SitemapGenerator::Sitemap.default_host = 'http://www.example.com' + ::SitemapGenerator::Sitemap.namer = ::SitemapGenerator::SimpleNamer.new(:sitemap, :start => 4) + ::SitemapGenerator::Sitemap.create do + 3.times do |i| + add_to_index 'sitemap#{i}.xml.gz' + end + add '/home' + end + end + + it 'should create the index and start the sitemap numbering from 4' do + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap4.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap4.xml.gz'), 'sitemap' + end + end + + describe 'should handle links added manually' do + before do + clean_sitemap_files_from_rails_app + ::SitemapGenerator::Sitemap.reset! + ::SitemapGenerator::Sitemap.default_host = 'http://www.example.com' + ::SitemapGenerator::Sitemap.include_root = false + end + + it 'should create the index' do + with_max_links(1) { + ::SitemapGenerator::Sitemap.create do + add_to_index 'customsitemap.xml.gz' + add '/one' + add '/two' + end + } + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + file_should_not_exist(rails_path('public/sitemap3.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + end + + it 'should create the index' do + with_max_links(1) { + ::SitemapGenerator::Sitemap.create do + add '/one' + add_to_index 'customsitemap.xml.gz' + add '/two' + end + } + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + file_should_not_exist(rails_path('public/sitemap3.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + end + + it 'should create an index when only manually added links' do + with_max_links(1) { + ::SitemapGenerator::Sitemap.create(:create_index => :auto) do + add_to_index 'customsitemap1.xml.gz' + end + } + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemap1.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + end + + it 'should create an index when only manually added links' do + with_max_links(1) { + ::SitemapGenerator::Sitemap.create(:create_index => :auto) do + add_to_index 'customsitemap1.xml.gz' + add_to_index 'customsitemap2.xml.gz' + add_to_index 'customsitemap3.xml.gz' + end + } + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemap1.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + end + + it 'should not create an index' do + # Create index is explicity turned off and no links added to sitemap, + # respect the setting and don't create the index. There is no sitemap file either. + ::SitemapGenerator::Sitemap.create(:create_index => false) do + add_to_index 'customsitemap1.xml.gz' + add_to_index 'customsitemap2.xml.gz' + add_to_index 'customsitemap3.xml.gz' + end + file_should_not_exist(rails_path('public/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemap1.xml.gz')) + end + + it 'should not create an index' do + ::SitemapGenerator::Sitemap.create(:create_index => false) do + add '/one' + end + file_should_exist(rails_path('public/sitemap.xml.gz')) # the sitemap, not an index + file_should_not_exist(rails_path('public/sitemap1.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'sitemap' + end + end + + describe 'sitemap path' do + before do + clean_sitemap_files_from_rails_app + ::SitemapGenerator::Sitemap.reset! + ::SitemapGenerator::Sitemap.default_host = 'http://test.local' + ::SitemapGenerator::Sitemap.filename = 'sitemap' + ::SitemapGenerator::Sitemap.create_index = true + end + + it 'should allow changing of the filename' do + ::SitemapGenerator::Sitemap.create(:filename => :geo_sitemap) do + add '/goerss' + add '/kml' + end + file_should_exist(rails_path('public/geo_sitemap.xml.gz')) + file_should_exist(rails_path('public/geo_sitemap1.xml.gz')) + end + + it 'should support setting a sitemap path' do + directory_should_not_exist(rails_path('public/sitemaps/')) + + sm = ::SitemapGenerator::Sitemap + sm.sitemaps_path = 'sitemaps/' + sm.create do + add '/' + add '/another' + end + + file_should_exist(rails_path('public/sitemaps/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemaps/sitemap1.xml.gz')) + end + + it 'should support setting a deeply nested sitemap path' do + directory_should_not_exist(rails_path('public/sitemaps/deep/directory')) + + sm = ::SitemapGenerator::Sitemap + sm.sitemaps_path = 'sitemaps/deep/directory/' + sm.create do + add '/' + add '/another' + add '/yet-another' + end + + file_should_exist(rails_path('public/sitemaps/deep/directory/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemaps/deep/directory/sitemap1.xml.gz')) + end + end + + describe 'external dependencies' do + it 'should work outside of Rails' do + hide_const('Rails') + expect { ::SitemapGenerator::LinkSet.new }.not_to raise_exception + end + end + + describe 'verbose' do + it 'should be set via ENV[\'VERBOSE\']' do + original = SitemapGenerator.verbose + SitemapGenerator.verbose = nil + ENV['VERBOSE'] = 'true' + expect(SitemapGenerator.verbose).to be(true) + SitemapGenerator.verbose = nil + ENV['VERBOSE'] = 'false' + expect(SitemapGenerator.verbose).to be(false) + SitemapGenerator.verbose = original + end + end + + describe 'yield_sitemap' do + it 'should set the yield_sitemap flag' do + SitemapGenerator.yield_sitemap = false + expect(SitemapGenerator.yield_sitemap?).to be(false) + SitemapGenerator.yield_sitemap = true + expect(SitemapGenerator.yield_sitemap?).to be(true) + SitemapGenerator.yield_sitemap = false + end + end + + describe 'create_index' do + let(:ls) { + SitemapGenerator::LinkSet.new( + :include_root => false, + :default_host => 'http://example.com', + :create_index => create_index, + :max_sitemap_links => 1 + ) + } + + let!(:request) do + stub_request(:get, "http://google.com/?url=#{CGI.escape('http://example.com/sitemap.xml.gz')}") + end + + before do + clean_sitemap_files_from_rails_app + end + + describe 'when true' do + let(:create_index) { true } + + it 'should always create index' do + ls.create { add('/one') } + expect(ls.sitemap_index.link_count).to eq(1) # one sitemap + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_not_exist(rails_path('public/sitemap2.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap1.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + + it 'should always create index' do + ls.create { add('/one'); add('/two') } + expect(ls.sitemap_index.link_count).to eq(2) # two sitemaps + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap1.xml.gz'), 'sitemap' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap2.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + end + + # Technically when there's no index, the first sitemap is the 'index' + # regardless of how many sitemaps were created, or if create_index is false. + describe 'when false' do + let(:create_index) { false } + + it 'should never create index' do + ls.create { add('/one') } + expect(ls.sitemap_index.link_count).to eq(1) # one sitemap + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemap1.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + + it 'should never create index' do + ls.create { add('/one'); add('/two') } + expect(ls.sitemap_index.link_count).to eq(2) # two sitemaps + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_not_exist(rails_path('public/sitemap2.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'sitemap' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap1.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + end + + describe 'when :auto' do + let(:create_index) { :auto } + + it 'should not create index if only one sitemap file' do + ls.create { add('/one') } + expect(ls.sitemap_index.link_count).to eq(1) # one sitemap + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_not_exist(rails_path('public/sitemap1.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + + it 'should create index if more than one sitemap file' do + ls.create { add('/one'); add('/two') } + expect(ls.sitemap_index.link_count).to eq(2) # two sitemaps + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + file_should_not_exist(rails_path('public/sitemap3.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap1.xml.gz'), 'sitemap' + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap2.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + + it 'should create index if more than one group' do + ls.create do + group(:filename => :group1) { add('/one') }; + group(:filename => :group2) { add('/two') }; + end + expect(ls.sitemap_index.link_count).to eq(2) # two sitemaps + file_should_exist(rails_path('public/sitemap.xml.gz')) + file_should_exist(rails_path('public/group1.xml.gz')) + file_should_exist(rails_path('public/group2.xml.gz')) + gzipped_xml_file_should_validate_against_schema rails_path('public/sitemap.xml.gz'), 'siteindex' + gzipped_xml_file_should_validate_against_schema rails_path('public/group1.xml.gz'), 'sitemap' + gzipped_xml_file_should_validate_against_schema rails_path('public/group2.xml.gz'), 'sitemap' + + # Test that the index url is reported correctly + ls.search_engines = { :google => 'http://google.com/?url=%s' } + ls.ping_search_engines + expect(request).to have_been_requested.once + end + end + end + + describe 'compress' do + let(:ls) { + SitemapGenerator::LinkSet.new( + :default_host => 'http://test.local', + :include_root => false, + :compress => compress, + :max_sitemap_links => 1 + ) + } + + before do + clean_sitemap_files_from_rails_app + end + + describe 'when false' do + let(:compress) { false } + + it 'should not compress files' do + ls.create do + add('/one') + add('/two') + group(:filename => :group) { + add('/group1') + add('/group2') + } + end + file_should_exist(rails_path('public/sitemap.xml')) + file_should_exist(rails_path('public/sitemap1.xml')) + file_should_exist(rails_path('public/group.xml')) + file_should_exist(rails_path('public/group1.xml')) + end + end + + describe 'when :all_but_first' do + let(:compress) { :all_but_first } + + it 'should not compress first file' do + ls.create do + add('/one') + add('/two') + add('/three') + group(:filename => :group) { + add('/group1') + add('/group2') + } + group(:filename => :group2, :compress => true) { + add('/group1') + add('/group2') + } + group(:filename => :group2, :compress => false) { + add('/group1') + add('/group2') + } + end + file_should_exist(rails_path('public/sitemap.xml')) + file_should_exist(rails_path('public/sitemap1.xml.gz')) + file_should_exist(rails_path('public/sitemap2.xml.gz')) + file_should_exist(rails_path('public/group.xml')) + file_should_exist(rails_path('public/group1.xml.gz')) + file_should_exist(rails_path('public/group2.xml.gz')) + file_should_exist(rails_path('public/group21.xml.gz')) + end + end + + describe 'in groups' do + let(:compress) { nil } + + it 'should respect passed in compress option' do + ls.create do + group(:filename => :group1, :compress => :all_but_first) { + add('/group1') + add('/group2') + } + group(:filename => :group2, :compress => true) { + add('/group1') + add('/group2') + } + group(:filename => :group3, :compress => false) { + add('/group1') + add('/group2') + } + end + file_should_exist(rails_path('public/group1.xml')) + file_should_exist(rails_path('public/group11.xml.gz')) + file_should_exist(rails_path('public/group2.xml.gz')) + file_should_exist(rails_path('public/group21.xml.gz')) + file_should_exist(rails_path('public/group3.xml')) + file_should_exist(rails_path('public/group31.xml')) + end + end + end + + describe 'respond_to?' do + it 'should correctly identify the methods that it responds to' do + expect(SitemapGenerator::Sitemap.respond_to?(:create)).to be(true) + expect(SitemapGenerator::Sitemap.respond_to?(:adapter)).to be(true) + expect(SitemapGenerator::Sitemap.respond_to?(:default_host)).to be(true) + expect(SitemapGenerator::Sitemap.respond_to?(:invalid_func)).to be(false) + end + end + + protected + + # + # Helpers + # + + def rails_path(file) + SitemapGenerator.app.root + file + end + + def copy_sitemap_file_to_rails_app(extension) + FileUtils.cp(File.join(SitemapGenerator.root, "spec/files/sitemap.#{extension}.rb"), SitemapGenerator.app.root + 'config/sitemap.rb') + end + + def delete_sitemap_file_from_rails_app + FileUtils.remove(SitemapGenerator.app.root + 'config/sitemap.rb') + rescue + nil + end + + def clean_sitemap_files_from_rails_app + FileUtils.rm_rf(rails_path('public/')) + FileUtils.mkdir_p(rails_path('public/')) + end + + # Better would be to just invoke the environment task and use + # the interpreter. + def execute_sitemap_config(opts={}) + SitemapGenerator::Interpreter.run(opts) + end +end diff --git a/spec/sitemap_generator/sitemap_groups_spec.rb b/spec/sitemap_generator/sitemap_groups_spec.rb new file mode 100644 index 00000000..824f1606 --- /dev/null +++ b/spec/sitemap_generator/sitemap_groups_spec.rb @@ -0,0 +1,140 @@ +require 'spec_helper' + +describe 'Sitemap Groups' do + let(:linkset) { ::SitemapGenerator::LinkSet.new(:default_host => 'http://test.com') } + + before do + FileUtils.rm_rf(SitemapGenerator.app.root + 'public/') + end + + it 'should not finalize the default sitemap if using groups' do + linkset.create do + group(:filename => :sitemap_en) do + add '/en' + end + end + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap_en.xml.gz') + file_should_not_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + end + + it 'should not write out empty groups' do + linkset.create do + group(:filename => :sitemap_en) { } + end + file_should_not_exist(SitemapGenerator.app.root + 'public/sitemap_en.xml.gz') + end + + it 'should add default links if no groups are created' do + linkset.create do + end + expect(linkset.link_count).to eq(1) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_not_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + end + + it 'should add links to the default sitemap' do + linkset.create do + add '/before' + group(:filename => :sitemap_en) do + add '/link' + end + add '/after' + end + expect(linkset.link_count).to eq(4) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap_en.xml.gz') + end + + it 'should rollover when sitemaps are full' do + linkset.max_sitemap_links = 1 + linkset.include_index = false + linkset.include_root = false + linkset.create do + add '/before' + group(:filename => :sitemap_en, :sitemaps_path => 'en/') do + add '/one' + add '/two' + end + add '/after' + end + expect(linkset.link_count).to eq(4) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap2.xml.gz') + file_should_not_exist(SitemapGenerator.app.root + 'public/sitemap3.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/en/sitemap_en.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/en/sitemap_en1.xml.gz') + file_should_not_exist(SitemapGenerator.app.root + 'public/en/sitemap_en2.xml.gz') + end + + it 'should support multiple groups' do + linkset.create do + group(:filename => :sitemap_en, :sitemaps_path => 'en/') do + add '/one' + end + group(:filename => :sitemap_fr, :sitemaps_path => 'fr/') do + add '/one' + end + end + expect(linkset.link_count).to eq(2) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/en/sitemap_en.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/fr/sitemap_fr.xml.gz') + end + + it 'the sitemap shouldn\'t be finalized until the end if the groups don\'t conflict' do + linkset.create do + add 'one' + group(:filename => :first) { add '/two' } + add 'three' + group(:filename => :second) { add '/four' } + add 'five' + end + expect(linkset.link_count).to eq(6) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/first.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/second.xml.gz') + gzipped_xml_file_should_validate_against_schema(SitemapGenerator.app.root + 'public/sitemap.xml.gz', 'siteindex') + gzipped_xml_file_should_validate_against_schema(SitemapGenerator.app.root + 'public/sitemap1.xml.gz', 'sitemap') + end + + it 'groups should share the sitemap if the sitemap location is unchanged' do + linkset.create do + add 'one' + group(:default_host => 'http://newhost.com') { add '/two' } + add 'three' + group(:default_host => 'http://betterhost.com') { add '/four' } + add 'five' + end + expect(linkset.link_count).to eq(6) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap2.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap3.xml.gz') + file_should_not_exist(SitemapGenerator.app.root + 'public/sitemap4.xml.gz') + gzipped_xml_file_should_validate_against_schema(SitemapGenerator.app.root + 'public/sitemap.xml.gz', 'siteindex') + gzipped_xml_file_should_validate_against_schema(SitemapGenerator.app.root + 'public/sitemap1.xml.gz', 'sitemap') + gzipped_xml_file_should_validate_against_schema(SitemapGenerator.app.root + 'public/sitemap2.xml.gz', 'sitemap') + gzipped_xml_file_should_validate_against_schema(SitemapGenerator.app.root + 'public/sitemap3.xml.gz', 'sitemap') + end + + it 'sitemaps should be finalized if virtual location settings are changed' do + linkset.create do + add 'one' + group(:sitemaps_path => :en) { add '/two' } + add 'three' + group(:sitemaps_host => 'http://newhost.com') { add '/four' } + add 'five' + end + expect(linkset.link_count).to eq(6) + file_should_exist(SitemapGenerator.app.root + 'public/sitemap.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap1.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap2.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/sitemap3.xml.gz') + file_should_not_exist(SitemapGenerator.app.root + 'public/sitemap4.xml.gz') + file_should_exist(SitemapGenerator.app.root + 'public/en/sitemap.xml.gz') + end +end diff --git a/spec/sitemap_generator/sitemap_location_spec.rb b/spec/sitemap_generator/sitemap_location_spec.rb new file mode 100644 index 00000000..8015eda4 --- /dev/null +++ b/spec/sitemap_generator/sitemap_location_spec.rb @@ -0,0 +1,229 @@ +require 'spec_helper' + +describe SitemapGenerator::SitemapLocation do + let(:default_host) { 'http://example.com' } + let(:location) { SitemapGenerator::SitemapLocation.new } + + it 'public_path should default to the public directory in the application root' do + expect(location.public_path).to eq(SitemapGenerator.app.root + 'public/') + end + + it 'should have a default namer' do + expect(location[:namer]).not_to be_nil + expect(location[:filename]).to be_nil + expect(location.filename).to eq('sitemap1.xml.gz') + end + + it 'should require a filename' do + location[:filename] = nil + expect { + expect(location.filename).to be_nil + }.to raise_error(SitemapGenerator::SitemapError, 'No filename or namer set') + end + + it 'should require a namer' do + location[:namer] = nil + expect { + expect(location.filename).to be_nil + }.to raise_error(SitemapGenerator::SitemapError, 'No filename or namer set') + end + + it 'should require a host' do + location = SitemapGenerator::SitemapLocation.new(:filename => nil, :namer => nil) + expect { + expect(location.host).to be_nil + }.to raise_error(SitemapGenerator::SitemapError, 'No value set for host') + end + + it 'should accept a Namer option' do + @namer = SitemapGenerator::SimpleNamer.new(:xxx) + location = SitemapGenerator::SitemapLocation.new(:namer => @namer) + expect(location.filename).to eq(@namer.to_s) + end + + it 'should protect the filename from further changes in the Namer' do + @namer = SitemapGenerator::SimpleNamer.new(:xxx) + location = SitemapGenerator::SitemapLocation.new(:namer => @namer) + expect(location.filename).to eq(@namer.to_s) + @namer.next + expect(location.filename).to eq(@namer.previous.to_s) + end + + it 'should allow changing the namer' do + @namer1 = SitemapGenerator::SimpleNamer.new(:xxx) + location = SitemapGenerator::SitemapLocation.new(:namer => @namer1) + expect(location.filename).to eq(@namer1.to_s) + @namer2 = SitemapGenerator::SimpleNamer.new(:yyy) + location[:namer] = @namer2 + expect(location.filename).to eq(@namer2.to_s) + end + + describe 'testing options and #with' do + + # Array of tuples with instance options and expected method return values + tests = [ + [{ + :sitemaps_path => nil, + :public_path => '/public', + :filename => 'sitemap.xml.gz', + :host => 'http://test.com' }, + { :url => 'http://test.com/sitemap.xml.gz', + :directory => '/public', + :path => '/public/sitemap.xml.gz', + :path_in_public => 'sitemap.xml.gz' + }], + [{ + :sitemaps_path => 'sitemaps/en/', + :public_path => '/public/system/', + :filename => 'sitemap.xml.gz', + :host => 'http://test.com/plus/extra/' }, + { :url => 'http://test.com/plus/extra/sitemaps/en/sitemap.xml.gz', + :directory => '/public/system/sitemaps/en', + :path => '/public/system/sitemaps/en/sitemap.xml.gz', + :path_in_public => 'sitemaps/en/sitemap.xml.gz' + }] + ] + tests.each do |opts, returns| + returns.each do |method, value| + it '#{method} should return #{value}' do + expect(location.with(opts).send(method)).to eq(value) + end + end + end + end + + describe 'when duplicated' do + it 'should not inherit some objects' do + location = SitemapGenerator::SitemapLocation.new(:filename => 'xxx', :host => default_host, :public_path => 'public/') + expect(location.url).to eq(default_host+'/xxx') + expect(location.public_path.to_s).to eq('public/') + dup = location.dup + expect(dup.url).to eq(location.url) + expect(dup.url).not_to be(location.url) + expect(dup.public_path.to_s).to eq(location.public_path.to_s) + expect(dup.public_path).not_to be(location.public_path) + end + end + + describe 'filesize' do + it 'should read the size of the file at path' do + expect(location).to receive(:path).and_return('/somepath') + expect(File).to receive(:size?).with('/somepath') + location.filesize + end + end + + describe 'public_path' do + it 'should append a trailing slash' do + location = SitemapGenerator::SitemapLocation.new(:public_path => 'public/google') + expect(location.public_path.to_s).to eq('public/google/') + location[:public_path] = 'new/path' + expect(location.public_path.to_s).to eq('new/path/') + location[:public_path] = 'already/slashed/' + expect(location.public_path.to_s).to eq('already/slashed/') + end + end + + describe 'sitemaps_path' do + it 'should append a trailing slash' do + location = SitemapGenerator::SitemapLocation.new(:sitemaps_path => 'public/google') + expect(location.sitemaps_path.to_s).to eq('public/google/') + location[:sitemaps_path] = 'new/path' + expect(location.sitemaps_path.to_s).to eq('new/path/') + location[:sitemaps_path] = 'already/slashed/' + expect(location.sitemaps_path.to_s).to eq('already/slashed/') + end + end + + describe 'url' do + it 'should handle paths not ending in slash' do + location = SitemapGenerator::SitemapLocation.new( + :public_path => 'public/google', :filename => 'xxx', + :host => default_host, :sitemaps_path => 'sub/dir') + expect(location.url).to eq(default_host + '/sub/dir/xxx') + end + end + + describe 'write' do + let(:location) do + SitemapGenerator::SitemapLocation.new(:public_path => 'public/', :verbose => verbose) + end + + before do + expect(location.adapter).to receive(:write) + end + + context 'when verbose is true' do + let(:verbose) { true } + + it 'should output summary line' do + expect(location).to receive(:summary) + location.write('data', 1) + end + end + + context 'when verbose is false' do + let(:verbose) { false } + + it 'should not output summary line' do + expect(location).not_to receive(:summary) + location.write('data', 1) + end + end + end + + describe 'filename' do + it 'should strip gz extension if not compressing' do + location = SitemapGenerator::SitemapLocation.new(:namer => SitemapGenerator::SimpleNamer.new(:sitemap), :compress => false) + expect(location.filename).to eq('sitemap.xml') + end + + it 'should not strip gz extension if compressing' do + location = SitemapGenerator::SitemapLocation.new(:namer => SitemapGenerator::SimpleNamer.new(:sitemap), :compress => true) + expect(location.filename).to eq('sitemap.xml.gz') + end + + it 'should strip gz extension if :all_but_first and first file' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap) + expect(namer).to receive(:start?).and_return(true) + location = SitemapGenerator::SitemapLocation.new(:namer => namer, :compress => :all_but_first) + expect(location.filename).to eq('sitemap.xml') + end + + it 'should strip gz extension if :all_but_first and first file' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap) + expect(namer).to receive(:start?).and_return(false) + location = SitemapGenerator::SitemapLocation.new(:namer => namer, :compress => :all_but_first) + expect(location.filename).to eq('sitemap.xml.gz') + end + end + + describe 'max_sitemap_links' do + it 'returns the value set on the object' do + location = SitemapGenerator::SitemapLocation.new(:max_sitemap_links => 10) + location[:max_sitemap_links] = 10 + end + end + + describe 'when not compressing' do + it 'the URL should point to the uncompressed file' do + location = SitemapGenerator::SitemapLocation.new( + :namer => SitemapGenerator::SimpleNamer.new(:sitemap), + :host => 'http://example.com', + :compress => false + ) + expect(location.url).to eq('http://example.com/sitemap.xml') + end + end +end + +describe SitemapGenerator::SitemapIndexLocation do + let(:location) { SitemapGenerator::SitemapIndexLocation.new } + + it 'should have a default namer' do + location = SitemapGenerator::SitemapIndexLocation.new + expect(location[:namer]).not_to be_nil + expect(location[:filename]).to be_nil + expect(location.filename).to eq('sitemap.xml.gz') + end +end diff --git a/spec/sitemap_generator/sitemap_namer_spec.rb b/spec/sitemap_generator/sitemap_namer_spec.rb new file mode 100644 index 00000000..d1f43f6a --- /dev/null +++ b/spec/sitemap_generator/sitemap_namer_spec.rb @@ -0,0 +1,97 @@ +require 'spec_helper' + +describe SitemapGenerator::SimpleNamer do + it 'should generate file names' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap) + expect(namer.to_s).to eq('sitemap.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + expect(namer.next.to_s).to eq('sitemap2.xml.gz') + end + + it 'should set the file extension' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :extension => '.xyz') + expect(namer.to_s).to eq('sitemap.xyz') + expect(namer.next.to_s).to eq('sitemap1.xyz') + expect(namer.next.to_s).to eq('sitemap2.xyz') + end + + it 'should set the starting index' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :start => 10) + expect(namer.to_s).to eq('sitemap.xml.gz') + expect(namer.next.to_s).to eq('sitemap10.xml.gz') + expect(namer.next.to_s).to eq('sitemap11.xml.gz') + end + + it 'should accept a string name' do + namer = SitemapGenerator::SimpleNamer.new('abc-def') + expect(namer.to_s).to eq('abc-def.xml.gz') + expect(namer.next.to_s).to eq('abc-def1.xml.gz') + expect(namer.next.to_s).to eq('abc-def2.xml.gz') + end + + it 'should return previous name' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap) + expect(namer.to_s).to eq('sitemap.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + expect(namer.previous.to_s).to eq('sitemap.xml.gz') + expect(namer.next.next.to_s).to eq('sitemap2.xml.gz') + expect(namer.previous.to_s).to eq('sitemap1.xml.gz') + expect(namer.next.next.to_s).to eq('sitemap3.xml.gz') + expect(namer.previous.to_s).to eq('sitemap2.xml.gz') + end + + it 'should raise if already at the start' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap) + expect(namer.to_s).to eq('sitemap.xml.gz') + # Use a regex because in Ruby 3.1 the error message includes newlines and the first line of backtrace + expect { namer.previous }.to raise_error(NameError, /Already at the start of the series/) + end + + it 'should handle names with underscores' do + namer = SitemapGenerator::SimpleNamer.new('sitemap1_') + expect(namer.to_s).to eq('sitemap1_.xml.gz') + expect(namer.next.to_s).to eq('sitemap1_1.xml.gz') + end + + it 'should reset the namer' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap) + expect(namer.to_s).to eq('sitemap.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + namer.reset + expect(namer.to_s).to eq('sitemap.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + end + + describe 'should handle the zero option' do + it 'as a string' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :zero => 'string') + expect(namer.to_s).to eq('sitemapstring.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + end + + it 'as an integer' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :zero => 0) + expect(namer.to_s).to eq('sitemap0.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + end + + it 'as a string' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :zero => '_index') + expect(namer.to_s).to eq('sitemap_index.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + end + + it 'as a symbol' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :zero => :index) + expect(namer.to_s).to eq('sitemapindex.xml.gz') + expect(namer.next.to_s).to eq('sitemap1.xml.gz') + end + + it 'with a starting index' do + namer = SitemapGenerator::SimpleNamer.new(:sitemap, :zero => 'abc', :start => 10) + expect(namer.to_s).to eq('sitemapabc.xml.gz') + expect(namer.next.to_s).to eq('sitemap10.xml.gz') + expect(namer.next.to_s).to eq('sitemap11.xml.gz') + end + end +end diff --git a/spec/sitemap_generator/sitemaps/alternate_sitemap_spec.rb b/spec/sitemap_generator/sitemaps/alternate_sitemap_spec.rb new file mode 100644 index 00000000..ea92291a --- /dev/null +++ b/spec/sitemap_generator/sitemaps/alternate_sitemap_spec.rb @@ -0,0 +1,100 @@ +require 'spec_helper' + +describe 'SitemapGenerator' do + it 'should not include media element unless provided' do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('media')).to be_nil + end + + it 'should not include hreflang element unless provided' do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :href => 'http://www.example.de/link_with_alternate.html' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate') + expect(alternate.attribute('hreflang')).to be_nil + end + + it 'should add alternate links to sitemap' do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html', + :media => 'only screen and (max-width: 640px)' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('href').value).to eq('http://www.example.de/link_with_alternate.html') + expect(alternate.attribute('media').value).to eq('only screen and (max-width: 640px)') + end + + it 'should add alternate links to sitemap with rel nofollow' do + xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('link_with_alternates.html', + :host => 'http://www.example.com', + :alternates => [ + { + :lang => 'de', + :href => 'http://www.example.de/link_with_alternate.html', + :nofollow => true, + :media => 'only screen and (max-width: 640px)' + } + ] + ).to_xml + + doc = Nokogiri::XML.parse("#{xml_fragment}") + url = doc.css('url') + expect(url).not_to be_nil + expect(url.css('loc').text).to eq('http://www.example.com/link_with_alternates.html') + + alternate = url.at_xpath('xhtml:link') + expect(alternate).not_to be_nil + expect(alternate.attribute('rel').value).to eq('alternate nofollow') + expect(alternate.attribute('hreflang').value).to eq('de') + expect(alternate.attribute('href').value).to eq('http://www.example.de/link_with_alternate.html') + expect(alternate.attribute('media').value).to eq('only screen and (max-width: 640px)') + end + +end + diff --git a/spec/sitemap_generator/sitemaps/mobile_sitemap_spec.rb b/spec/sitemap_generator/sitemaps/mobile_sitemap_spec.rb new file mode 100644 index 00000000..12768b0e --- /dev/null +++ b/spec/sitemap_generator/sitemaps/mobile_sitemap_spec.rb @@ -0,0 +1,27 @@ +require 'spec_helper' + +describe 'SitemapGenerator' do + + it 'should add the mobile sitemap element' do + loc = 'http://www.example.com/mobile_page.html' + format = 'html' + + mobile_xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('mobile_page.html', + :host => 'http://www.example.com', + :mobile => true + ).to_xml + + # Check that the options were parsed correctly + doc = Nokogiri::XML.parse("#{mobile_xml_fragment}") + url = doc.at_xpath('//url') + expect(url).not_to be_nil + expect(url.at_xpath('loc').text).to eq(loc) + + mobile = url.at_xpath('mobile:mobile') + expect(mobile).not_to be_nil + + # Google's documentation and published schema don't match some valid elements may + # not validate. + xml_fragment_should_validate_against_schema(mobile, 'sitemap-mobile', 'xmlns:mobile' => SitemapGenerator::SCHEMAS['mobile']) + end +end diff --git a/spec/sitemap_generator/sitemaps/news_sitemap_spec.rb b/spec/sitemap_generator/sitemaps/news_sitemap_spec.rb new file mode 100644 index 00000000..bab2461b --- /dev/null +++ b/spec/sitemap_generator/sitemaps/news_sitemap_spec.rb @@ -0,0 +1,42 @@ +require 'spec_helper' + +describe 'SitemapGenerator' do + + it 'should add the news sitemap element' do + loc = 'http://www.example.com/my_article.html' + + news_xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('my_article.html', { + :host => 'http://www.example.com', + + :news => { + :publication_name => 'Example', + :publication_language => 'en', + :title => 'My Article', + :keywords => 'my article, articles about myself', + :stock_tickers => 'SAO:PETR3', + :publication_date => '2011-08-22', + :access => 'Subscription', + :genres => 'PressRelease' + } + }).to_xml + + doc = Nokogiri::XML.parse("#{news_xml_fragment}") + + url = doc.at_xpath('//url') + loc = url.at_xpath('loc') + expect(loc.text).to eq('http://www.example.com/my_article.html') + + news = doc.at_xpath('//news:news') + + expect(news.at_xpath('//news:title').text).to eq('My Article') + expect(news.at_xpath('//news:keywords').text).to eq('my article, articles about myself') + expect(news.at_xpath('//news:stock_tickers').text).to eq('SAO:PETR3') + expect(news.at_xpath('//news:publication_date').text).to eq('2011-08-22') + expect(news.at_xpath('//news:access').text).to eq('Subscription') + expect(news.at_xpath('//news:genres').text).to eq('PressRelease') + expect(news.at_xpath('//news:name').text).to eq('Example') + expect(news.at_xpath('//news:language').text).to eq('en') + + xml_fragment_should_validate_against_schema(news, 'sitemap-news', 'xmlns:news' => SitemapGenerator::SCHEMAS['news']) + end +end diff --git a/spec/sitemap_generator/sitemaps/pagemap_sitemap_spec.rb b/spec/sitemap_generator/sitemaps/pagemap_sitemap_spec.rb new file mode 100644 index 00000000..8d683ea9 --- /dev/null +++ b/spec/sitemap_generator/sitemaps/pagemap_sitemap_spec.rb @@ -0,0 +1,57 @@ +require 'spec_helper' + +describe 'SitemapGenerator' do + let(:schema) { SitemapGenerator::SCHEMAS['pagemap'] } + + it 'should add the pagemap sitemap element' do + pagemap_xml_fragment = SitemapGenerator::Builder::SitemapUrl.new('my_page.html', { + :host => 'http://www.example.com', + + :pagemap => { + :dataobjects => [ + { + :type => 'document', + :id => 'hibachi', + :attributes => [ + {:name => 'name', :value => 'Dragon'}, + {:name => 'review', :value => 3.5}, + ] + }, + { + :type => 'stats', + :attributes => [ + {:name => 'installs', :value => 2000}, + {:name => 'comments', :value => 200}, + ] + } + ] + } + }).to_xml + + # Nokogiri is a fickle beast. We have to add the namespace and define + # the prefix in order for XPath queries to work. And then we have to + # reingest because otherwise Nokogiri doesn't use it. + doc = Nokogiri::XML.parse(pagemap_xml_fragment) + doc.root.add_namespace_definition('pagemap', schema) + doc = Nokogiri::XML.parse(doc.to_xml) + + url = doc.at_xpath('//url') + loc = url.at_xpath('loc') + expect(loc.text).to eq('http://www.example.com/my_page.html') + + pagemap = doc.at_xpath('//pagemap:PageMap', 'pagemap' => schema) + expect(pagemap.element_children.count).to eq(2) + dataobject = pagemap.at_xpath('//pagemap:DataObject') + expect(dataobject.attributes['type'].value).to eq('document') + expect(dataobject.attributes['id'].value).to eq('hibachi') + expect(dataobject.element_children.count).to eq(2) + first_attribute = dataobject.element_children.first + second_attribute = dataobject.element_children.last + expect(first_attribute.text).to eq('Dragon') + expect(first_attribute.attributes['name'].value).to eq('name') + expect(second_attribute.text).to eq('3.5') + expect(second_attribute.attributes['name'].value).to eq('review') + + xml_fragment_should_validate_against_schema(pagemap, 'sitemap-pagemap', 'xmlns:pagemap' => schema) + end +end diff --git a/spec/sitemap_generator/sitemaps/video_sitemap_spec.rb b/spec/sitemap_generator/sitemaps/video_sitemap_spec.rb new file mode 100644 index 00000000..f193d119 --- /dev/null +++ b/spec/sitemap_generator/sitemaps/video_sitemap_spec.rb @@ -0,0 +1,117 @@ +require 'spec_helper' + +describe 'SitemapGenerator' do + let(:url_options) do + { + :host => 'http://example.com', + :path => 'cool_video.html' + } + end + + let(:video_options) do + { + :thumbnail_loc => 'http://example.com/video1_thumbnail.png', + :title => 'Cool Video', + :content_loc => 'http://example.com/cool_video.mpg', + :player_loc => 'http://example.com/cool_video_player.swf', + :gallery_loc => 'http://example.com/cool_video_gallery', + :gallery_title => 'Gallery Title', + :allow_embed => true, + :autoplay => 'id=123', + :description => 'An new perspective in cool video technology', + :tags => %w(tag1 tag2 tag3), + :category => 'cat1', + :uploader => 'sokrates', + :uploader_info => 'http://sokrates.example.com', + :expiration_date => Time.at(0), + :publication_date => Time.at(0), + :family_friendly => true, + :view_count => 123, + :duration => 456, + :rating => 0.499999999, + :price => 123.45, + :price_currency => 'CAD', + :price_resolution => 'HD', + :price_type => 'rent' + } + end + + # Return XML for the element. + def video_xml(video_options) + SitemapGenerator::Builder::SitemapUrl.new(url_options[:path], { + :host => url_options[:host], + :video => video_options + }).to_xml + end + + # Return a Nokogiri document from the XML. The root of the document is the element. + def video_doc(xml) + Nokogiri::XML.parse("#{xml}") + end + + # Validate the contents of the video element + def validate_video_element(video_doc, video_options) + expect(video_doc.at_xpath('video:thumbnail_loc').text).to eq(video_options[:thumbnail_loc]) + expect(video_doc.at_xpath('video:thumbnail_loc').text).to eq(video_options[:thumbnail_loc]) + expect(video_doc.at_xpath('video:gallery_loc').text).to eq(video_options[:gallery_loc]) + expect(video_doc.at_xpath('video:gallery_loc').attribute('title').text).to eq(video_options[:gallery_title]) + expect(video_doc.at_xpath('video:title').text).to eq(video_options[:title]) + expect(video_doc.at_xpath('video:view_count').text).to eq(video_options[:view_count].to_s) + expect(video_doc.at_xpath('video:duration').text).to eq(video_options[:duration].to_s) + expect(video_doc.at_xpath('video:rating').text).to eq('%0.1f' % video_options[:rating]) + expect(video_doc.at_xpath('video:content_loc').text).to eq(video_options[:content_loc]) + expect(video_doc.at_xpath('video:category').text).to eq(video_options[:category]) + expect(video_doc.xpath('video:tag').collect(&:text)).to eq(video_options[:tags]) + expect(video_doc.at_xpath('video:expiration_date').text).to eq(video_options[:expiration_date].iso8601) + expect(video_doc.at_xpath('video:publication_date').text).to eq(video_options[:publication_date].iso8601) + expect(video_doc.at_xpath('video:player_loc').text).to eq(video_options[:player_loc]) + expect(video_doc.at_xpath('video:player_loc').attribute('allow_embed').text).to eq(video_options[:allow_embed] ? 'yes' : 'no') + expect(video_doc.at_xpath('video:player_loc').attribute('autoplay').text).to eq(video_options[:autoplay]) + expect(video_doc.at_xpath('video:uploader').text).to eq(video_options[:uploader]) + expect(video_doc.at_xpath('video:uploader').attribute('info').text).to eq(video_options[:uploader_info]) + expect(video_doc.at_xpath('video:price').text).to eq(video_options[:price].to_s) + expect(video_doc.at_xpath('video:price').attribute('resolution').text).to eq(video_options[:price_resolution].to_s) + expect(video_doc.at_xpath('video:price').attribute('type').text).to eq(video_options[:price_type].to_s) + expect(video_doc.at_xpath('video:price').attribute('currency').text).to eq(video_options[:price_currency].to_s) + xml_fragment_should_validate_against_schema(video_doc, 'sitemap-video', 'xmlns:video' => SitemapGenerator::SCHEMAS['video']) + end + + it 'should add a valid video sitemap element' do + xml = video_xml(video_options) + doc = video_doc(xml) + expect(doc.at_xpath('//url/loc').text).to eq(File.join(url_options[:host], url_options[:path])) + validate_video_element(doc.at_xpath('//url/video:video'), video_options) + end + + it 'should support multiple video elements' do + xml = video_xml([video_options, video_options]) + doc = video_doc(xml) + expect(doc.at_xpath('//url/loc').text).to eq(File.join(url_options[:host], url_options[:path])) + expect(doc.xpath('//url/video:video').count).to eq(2) + doc.xpath('//url/video:video').each do |video| + validate_video_element(video, video_options) + end + end + + it 'should default allow_embed to \'yes\'' do + xml = video_xml(video_options.merge(:allow_embed => nil)) + doc = video_doc(xml) + expect(doc.at_xpath('//url/video:video/video:player_loc').attribute('allow_embed').text).to eq('yes') + end + + it 'should not include optional elements if they are not passed' do + optional = [:player_loc, :content_loc, :category, :tags, :tag, :uploader, :gallery_loc, :family_friendly, :publication_date, :expiration_date, :view_count, :rating, :duration] + required_options = video_options.delete_if { |k,v| optional.include?(k) } + xml = video_xml(required_options) + doc = video_doc(xml) + optional.each do |element| + expect(doc.at_xpath("//url/video:video/video:#{element}")).to be_nil + end + end + + it 'should not include autoplay param if blank' do + xml = video_xml(video_options.tap {|v| v.delete(:autoplay) }) + doc = video_doc(xml) + expect(doc.at_xpath('//url/video:video/video:player_loc').attribute('autoplay')).to be_nil + end +end diff --git a/spec/sitemap_generator/templates_spec.rb b/spec/sitemap_generator/templates_spec.rb new file mode 100644 index 00000000..cd626df4 --- /dev/null +++ b/spec/sitemap_generator/templates_spec.rb @@ -0,0 +1,23 @@ +require 'spec_helper' + +describe 'Templates class' do + + it 'should provide method access to each template' do + SitemapGenerator::Templates::FILES.each do |name, file| + expect(SitemapGenerator.templates.send(name)).not_to be(nil) + expect(SitemapGenerator.templates.send(name)).to eq(File.read(File.join(SitemapGenerator.root, 'templates', file))) + end + end + + describe 'templates' do + before do + SitemapGenerator.templates.sitemap_sample = nil + expect(File).to receive(:read).and_return('read file').once + end + + it 'should only be read once' do + SitemapGenerator.templates.sitemap_sample + SitemapGenerator.templates.sitemap_sample + end + end +end diff --git a/spec/sitemap_generator/utilities/existence_spec.rb b/spec/sitemap_generator/utilities/existence_spec.rb new file mode 100644 index 00000000..c0f1f2f1 --- /dev/null +++ b/spec/sitemap_generator/utilities/existence_spec.rb @@ -0,0 +1,26 @@ +require 'spec_helper' + +class EmptyTrue + def empty?() true; end +end + +class EmptyFalse + def empty?() false; end +end + +BLANK = [ EmptyTrue.new, nil, false, '', ' ', " \n\t \r ", [], {} ] +NOT = [ EmptyFalse.new, Object.new, true, 0, 1, 'a', [nil], { nil => 0 } ] + +describe Object do + let(:utils) { SitemapGenerator::Utilities } + + it 'should define blankness' do + BLANK.each { |v| expect(utils.blank?(v)).to be(true) } + NOT.each { |v| expect(utils.blank?(v)).to be(false) } + end + + it 'should define presence' do + BLANK.each { |v| expect(utils.present?(v)).to be(false) } + NOT.each { |v| expect(utils.present?(v)).to be(true) } + end +end diff --git a/spec/sitemap_generator/utilities/hash_spec.rb b/spec/sitemap_generator/utilities/hash_spec.rb new file mode 100644 index 00000000..1a9fca04 --- /dev/null +++ b/spec/sitemap_generator/utilities/hash_spec.rb @@ -0,0 +1,57 @@ +require 'spec_helper' + +describe SitemapGenerator::Utilities do + let(:utils) { SitemapGenerator::Utilities } + + describe 'assert_valid_keys' do + it 'should raise' do + expect do + utils.assert_valid_keys({ :failore => 'stuff', :funny => 'business' }, [ :failure, :funny]) + utils.assert_valid_keys({ :failore => 'stuff', :funny => 'business' }, :failure, :funny) + end.to raise_error(ArgumentError, 'Unknown key(s): failore') + end + + it 'should not raise' do + expect do + utils.assert_valid_keys({ :failure => 'stuff', :funny => 'business' }, [ :failure, :funny ]) + utils.assert_valid_keys({ :failure => 'stuff', :funny => 'business' }, :failure, :funny) + end.not_to raise_error + end + end + + describe 'keys' do + before do + @strings = { 'a' => 1, 'b' => 2 } + @symbols = { :a => 1, :b => 2 } + @mixed = { :a => 1, 'b' => 2 } + @fixnums = { 0 => 1, 1 => 2 } + if RUBY_VERSION < '1.9.0' + @illegal_symbols = { '\0' => 1, '' => 2, [] => 3 } + else + @illegal_symbols = { [] => 3 } + end + end + + it 'should symbolize_keys' do + expect(utils.symbolize_keys(@symbols)).to eq(@symbols) + expect(utils.symbolize_keys(@strings)).to eq(@symbols) + expect(utils.symbolize_keys(@mixed)).to eq(@symbols) + end + + it 'should symbolize_keys!' do + expect(utils.symbolize_keys!(@symbols.dup)).to eq(@symbols) + expect(utils.symbolize_keys!(@strings.dup)).to eq(@symbols) + expect(utils.symbolize_keys!(@mixed.dup)).to eq(@symbols) + end + + it 'should symbolize_keys_preserves_keys_that_cant_be_symbolized' do + expect(utils.symbolize_keys(@illegal_symbols)).to eq(@illegal_symbols) + expect(utils.symbolize_keys!(@illegal_symbols.dup)).to eq(@illegal_symbols) + end + + it 'should symbolize_keys_preserves_fixnum_keys' do + expect(utils.symbolize_keys(@fixnums)).to eq(@fixnums) + expect(utils.symbolize_keys!(@fixnums.dup)).to eq(@fixnums) + end + end +end diff --git a/spec/sitemap_generator/utilities/rounding_spec.rb b/spec/sitemap_generator/utilities/rounding_spec.rb new file mode 100644 index 00000000..fe88356f --- /dev/null +++ b/spec/sitemap_generator/utilities/rounding_spec.rb @@ -0,0 +1,31 @@ +require 'spec_helper' + +describe SitemapGenerator::Utilities do + describe 'rounding' do + let(:utils) { SitemapGenerator::Utilities } + + it 'should round for positive number' do + expect(utils.round(1.4)) .to eq(1) + expect(utils.round(1.6)) .to eq(2) + expect(utils.round(1.6, 0)) .to eq(2) + expect(utils.round(1.4, 1)) .to eq(1.4) + expect(utils.round(1.4, 3)) .to eq(1.4) + expect(utils.round(1.45, 1)) .to eq(1.5) + expect(utils.round(1.445, 2)).to eq(1.45) + # Demonstrates a bug in the round method + # utils.round(9.995, 2).should == 10 + end + + it 'should round for negative number' do + expect(utils.round(-1.4)) .to eq(-1) + expect(utils.round(-1.6)) .to eq(-2) + expect(utils.round(-1.4, 1)) .to eq(-1.4) + expect(utils.round(-1.45, 1)).to eq(-1.5) + end + + it 'should round with negative precision' do + expect(utils.round(123456.0, -1)).to eq(123460.0) + expect(utils.round(123456.0, -2)).to eq(123500.0) + end + end +end diff --git a/spec/sitemap_generator/utilities_spec.rb b/spec/sitemap_generator/utilities_spec.rb new file mode 100644 index 00000000..b1e876a6 --- /dev/null +++ b/spec/sitemap_generator/utilities_spec.rb @@ -0,0 +1,103 @@ +require 'spec_helper' + +describe SitemapGenerator::Utilities do + + describe 'assert_valid_keys' do + it 'should raise error on invalid keys' do + expect { + SitemapGenerator::Utilities.assert_valid_keys({ :name => 'Rob', :years => '28' }, :name, :age) + }.to raise_exception(ArgumentError) + expect { + SitemapGenerator::Utilities.assert_valid_keys({ :name => 'Rob', :age => '28' }, 'name', 'age') + }.to raise_exception(ArgumentError) + end + + it 'should not raise error on valid keys' do + expect { + SitemapGenerator::Utilities.assert_valid_keys({ :name => 'Rob', :age => '28' }, :name, :age) + }.not_to raise_exception + + expect { + SitemapGenerator::Utilities.assert_valid_keys({ :name => 'Rob' }, :name, :age) + }.not_to raise_exception + end + end + + describe 'titleize' do + it 'should titleize words and replace underscores' do + expect(SitemapGenerator::Utilities.titleize('google')).to eq('Google') + expect(SitemapGenerator::Utilities.titleize('amy_and_jon')).to eq('Amy And Jon') + end + end + + describe 'truthy?' do + it 'should be truthy' do + ['1', 1, 't', 'true', true].each do |value| + expect(SitemapGenerator::Utilities.truthy?(value)).to be(true) + end + expect(SitemapGenerator::Utilities.truthy?(nil)).to be(false) + end + end + + describe 'falsy?' do + it 'should be falsy' do + ['0', 0, 'f', 'false', false].each do |value| + expect(SitemapGenerator::Utilities.falsy?(value)).to be(true) + end + expect(SitemapGenerator::Utilities.falsy?(nil)).to be(false) + end + end + + describe 'as_array' do + it 'should return an array unchanged' do + expect(SitemapGenerator::Utilities.as_array([])).to eq([]) + expect(SitemapGenerator::Utilities.as_array([1])).to eq([1]) + expect(SitemapGenerator::Utilities.as_array([1,2,3])).to eq([1,2,3]) + end + + it 'should return empty array on nil' do + expect(SitemapGenerator::Utilities.as_array(nil)).to eq([]) + end + + it 'should make array of item otherwise' do + expect(SitemapGenerator::Utilities.as_array('')).to eq(['']) + expect(SitemapGenerator::Utilities.as_array(1)).to eq([1]) + expect(SitemapGenerator::Utilities.as_array('hello')).to eq(['hello']) + expect(SitemapGenerator::Utilities.as_array({})).to eq([{}]) + end + end + + describe 'append_slash' do + it 'should yield the expect result' do + expect(SitemapGenerator::Utilities.append_slash('')).to eq('') + expect(SitemapGenerator::Utilities.append_slash(nil)).to eq('') + expect(SitemapGenerator::Utilities.append_slash(Pathname.new(''))).to eq('') + expect(SitemapGenerator::Utilities.append_slash('tmp')).to eq('tmp/') + expect(SitemapGenerator::Utilities.append_slash(Pathname.new('tmp'))).to eq('tmp/') + expect(SitemapGenerator::Utilities.append_slash('tmp/')).to eq('tmp/') + expect(SitemapGenerator::Utilities.append_slash(Pathname.new('tmp/'))).to eq('tmp/') + end + end + + describe 'ellipsis' do + it 'should not modify when less than or equal to max' do + (1..10).each do |i| + string = 'a'*i + expect(SitemapGenerator::Utilities.ellipsis(string, 10)).to eq(string) + end + end + + it 'should replace last 3 characters with ellipsis when greater than max' do + (1..5).each do |i| + string = 'aaaaa' + 'a'*i + expect(SitemapGenerator::Utilities.ellipsis(string, 5)).to eq('aa...') + end + end + + it 'should not freak out when string too small' do + expect(SitemapGenerator::Utilities.ellipsis('a', 1)).to eq('a') + expect(SitemapGenerator::Utilities.ellipsis('aa', 1)).to eq('...') + expect(SitemapGenerator::Utilities.ellipsis('aaa', 1)).to eq('...') + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..3f0e112c --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,18 @@ +# require 'simplecov' +# SimpleCov.start +require 'bundler/setup' +Bundler.require + +require './spec/support/file_macros' +require './spec/support/xml_macros' +require 'webmock/rspec' +require 'byebug' + +WebMock.disable_net_connect! + +SitemapGenerator.verbose = false + +RSpec.configure do |config| + config.include(FileMacros) + config.include(XmlMacros) +end diff --git a/spec/support/file_macros.rb b/spec/support/file_macros.rb new file mode 100644 index 00000000..5bbf143c --- /dev/null +++ b/spec/support/file_macros.rb @@ -0,0 +1,32 @@ +module FileMacros + def files_should_be_identical(first, second) + expect(identical_files?(first, second)).to be(true) + end + + def files_should_not_be_identical(first, second) + expect(identical_files?(first, second)).to be(false) + end + + def file_should_exist(file) + expect(File.exist?(file)).to be(true), 'File #{file} should exist' + end + + def directory_should_exist(dir) + expect(File.exist?(dir)).to be(true), 'Directory #{dir} should exist' + expect(File.directory?(dir)).to be(true), '#{dir} should be a directory' + end + + def directory_should_not_exist(dir) + expect(File.exist?(dir)).to be(false), 'Directory #{dir} should not exist' + end + + def file_should_not_exist(file) + expect(File.exist?(file)).to be(false), 'File #{file} should not exist' + end + + def identical_files?(first, second) + file_should_exist(first) + file_should_exist(second) + expect(open(second, 'r').read).to eq(open(first, 'r').read) + end +end diff --git a/spec/support/schemas/siteindex.xsd b/spec/support/schemas/siteindex.xsd new file mode 100644 index 00000000..efc41636 --- /dev/null +++ b/spec/support/schemas/siteindex.xsd @@ -0,0 +1,73 @@ + + + + + XML Schema for Sitemap index files. + Last Modifed 2009-04-08 + + + + + + + Container for a set of up to 50,000 sitemap URLs. + This is the root element of the XML file. + + + + + + + + + + + + + Container for the data needed to describe a sitemap. + + + + + + + + + + + + REQUIRED: The location URI of a sitemap. + The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt). + + + + + + + + + + + + OPTIONAL: The date the document was last modified. The date must conform + to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime). + Example: 2005-05-10 + Lastmod may also contain a timestamp. + Example: 2005-05-10T17:33:30+08:00 + + + + + + + + + + + + + + diff --git a/spec/support/schemas/sitemap-mobile.xsd b/spec/support/schemas/sitemap-mobile.xsd new file mode 100644 index 00000000..328f9625 --- /dev/null +++ b/spec/support/schemas/sitemap-mobile.xsd @@ -0,0 +1,32 @@ + + + + + + XML Schema for the Mobile Sitemap extension. This schema defines the + Mobile-specific elements only; the core Sitemap elements are defined + separately. + + Help Center documentation for the Mobile Sitemap extension: + + http://www.google.com/support/webmasters/bin/topic.py?topic=8493 + + Copyright 2010 Google Inc. All Rights Reserved. + + + + + + + Mobile sitemaps just contain an empty "mobile" tag to identify a + URL as having mobile content. + + + + + + diff --git a/spec/support/schemas/sitemap-news.xsd b/spec/support/schemas/sitemap-news.xsd new file mode 100644 index 00000000..541ba3f1 --- /dev/null +++ b/spec/support/schemas/sitemap-news.xsd @@ -0,0 +1,159 @@ + + + + + + XML Schema for the News Sitemap extension. This schema defines the + News-specific elements only; the core Sitemap elements are defined + separately. + + Help Center documentation for the News Sitemap extension: + + http://www.google.com/support/news_pub/bin/topic.py?topic=11666 + + Copyright 2010 Google Inc. All Rights Reserved. + + + + + + + + + + The publication in which the article appears. Required. + + + + + + + + Name of the news publication. It must exactly match + the name as it appears on your articles in news.google.com, + omitting any trailing parentheticals. + For example, if the name appears in Google News as + "The Example Times (subscription)", you should use + "The Example Times". Required. + + + + + + + Language of the publication. It should be an + ISO 639 Language Code (either 2 or 3 letters); see: + http://www.loc.gov/standards/iso639-2/php/code_list.php + Exception: For Chinese, please use zh-cn for Simplified + Chinese or zh-tw for Traditional Chinese. Required. + + + + + + + + + + + + + + + Accessibility of the article. Required if access is not open, + otherwise this tag should be omitted. + + + + + + + + + + + + + A comma-separated list of properties characterizing the content + of the article, such as "PressRelease" or "UserGenerated". + For a list of possible values, see: + http://www.google.com/support/news_pub/bin/answer.py?answer=93992 + Required if any genres apply to the article, otherwise this tag + should be omitted. + + + + + + + + + + + + Article publication date in W3C format, specifying the complete + date (YYYY-MM-DD) with optional timestamp. See: + http://www.w3.org/TR/NOTE-datetime + Please ensure that you give the original date and time at which + the article was published on your site; do not give the time + at which the article was added to your Sitemap. Required. + + + + + + + + + + + + + + + + + Title of the news article. Optional, but highly recommended. + Note: The title may be truncated for space reasons when shown + on Google News. + + + + + + + Comma-separated list of keywords describing the topic of + the article. Keywords may be drawn from, but are not limited to, + the list of existing Google News keywords; see: + http://www.google.com/support/news_pub/bin/answer.py?answer=116037 + Optional. + + + + + + + Comma-separated list of up to 5 stock tickers of the companies, + mutual funds, or other financial entities that are the main subject + of the article. Relevant primarily for business articles. + Each ticker must be prefixed by the name of its stock exchange, + and must match its entry in Google Finance. + For example, "NASDAQ:AMAT" (but not "NASD:AMAT"), + or "BOM:500325" (but not "BOM:RIL"). Optional. + + + + + + + + + + + + + diff --git a/spec/support/schemas/sitemap-pagemap.xsd b/spec/support/schemas/sitemap-pagemap.xsd new file mode 100644 index 00000000..c151d355 --- /dev/null +++ b/spec/support/schemas/sitemap-pagemap.xsd @@ -0,0 +1,97 @@ + + + + + + + XML Schema for the PageMap Sitemap extension. This schema defines the + PageMap-specific elements only; the core Sitemap elements are defined + separately. + + Copyright 2011 Google Inc. All Rights Reserved. + + + + + + + + + + Template file specification. Can be used for overriding the + default rendering of search results delivered via + Google Custom Search Engine. + + + + + + + Reference to a template file. A template file contains a set of + ResultSpecs, which, given DataObjects of appropriate types on + the page, renders a search result based on the key-value pairs + found in those DataObjects. If the template file is not + specified, Google will use the default predefined set of + templates tailored to popular content. + + + + + + + + + + + + Either 'value' attribute or text content must be set, but + not both. + + + + + + + + + Name of the attribute. + + + + + + + Value of the attribute. + + + + + + + + + + + + Type of the object. + + + + + + + ID of the object. + + + + + + + + + + diff --git a/spec/support/schemas/sitemap-video.xsd b/spec/support/schemas/sitemap-video.xsd new file mode 100644 index 00000000..4bac2178 --- /dev/null +++ b/spec/support/schemas/sitemap-video.xsd @@ -0,0 +1,643 @@ + + + + + + XML Schema for the Video Sitemap extension. This schema defines the + Video-specific elements only; the core Sitemap elements are defined + separately. + + Help Center documentation for the Video Sitemap extension: + + http://www.google.com/support/webmasters/bin/topic.py?topic=10079 + + Copyright 2010 Google Inc. All Rights Reserved. + + + + + + + A value that can be yes or no. Permitted cases are all-lowercase (yes/no), + all-uppercase (YES/NO) or starting with capital (Yes/No). + + + + + + + + + + + + + + + + Space-separated country codes in ISO 3166 format. + + Country codes: + http://www.iso.org/iso/english_country_names_and_code_elements + + + + + + + + + + + Space-separated platform names. + + Platform names: + web - desktop and laptop browsers. + mobile - mobile devices such as phones and tablets. + tv - tv platforms such as GoogleTV. + + + + + + + + + + + + + + A URL pointing to the URL for the video thumbnail image file. We can + accept most image sizes/types but recommend your thumbnails are at + least 120x90 pixels in .jpg, .png, or. gif formats. + + + + + + + + The title of the video. + + + + + + + + + + + + + The description of the video. + + + + + + + + + + + + + At least one of <video:player_loc> and + <video:content_loc> is required. + + This should be a .mpg, .mpeg, .mp4, .m4v, .mov, .wmv, .asf, .avi, + .ra, .ram, .rm, .flv, or other video file format, and can be omitted + if <video:player_loc> is specified. However, because Google + needs to be able to check that the Flash object is actually a player + for video (as opposed to some other use of Flash, e.g. games and + animations), it's helpful to provide both. + + + + + + + + At least one of <video:player_loc> and + <video:content_loc> is required. + + A URL pointing to a Flash player for a specific video. In general, + this is the information in the src element of an <embed> tag + and should not be the same as the content of the <loc> tag. + ​Since each video is uniquely identified by its content URL (the + location of the actual video file) or, if a content URL is not + present, a player URL (a URL pointing to a player for the video), + you must include either the <video:player_loc> or + <video:content_loc> tags. If these tags are omitted and we + can't find this information, we'll be unable to index your video. + + + + + + + + + Attribute allow_embed specifies whether Google can embed the + video in search results. Allowed values are "Yes" or "No". + The default value is "Yes". + + + + + + + User-defined string that Google may append (if appropriate) + to the flashvars parameter to enable autoplay of the video. + + + + + + + + + + + + The duration of the video in seconds. + + + + + + + + + + + + + The date after which the video will no longer be available, in + W3C format. Acceptable values are complete date (YYYY-MM-DD) and + complete date plus hours, minutes and seconds, and timezone + (YYYY-MM-DDThh:mm:ss+TZD). For example, 2007-07-16T19:20:30+08:00. + Don't supply this information if your video does not expire. + + + + + + + + + + + + + + + + + + The rating of the video. + + + + + + + + + + + + + + Use <video:content_segment_loc> only in conjunction with + <video:player_loc>. + + If you publish your video as a series of raw videos (for example, if + you submit a full movie as a continuous series of shorter clips), + you can use the <video:content_segment_loc> to supply us with + a series of URLs, in the order in which they should be concatenated + to recreate the video in its entirety. Each URL should point to a + .mpg, .mpeg, .mp4, .m4v, .mov, .wmv, .asf, .avi, .ra, .ram, .rm, + .flv, or other video file format. It should not point to any Flash + content. + + + + + + + + + The duration of the clip in seconds. + + + + + + + + + + + + + + + + + The number of times the video has been viewed. + + + + + + + + The date the video was first published, in W3C format. Acceptable + values are complete date (YYYY-MM-DD) and complete date plus hours, + minutes and seconds, and timezone (YYYY-MM-DDThh:mm:ss+TZD). + For example, 2007-07-16T19:20:30+08:00. + + + + + + + + + + + + + + + + + + A tag associated with the video. Tags are generally very short + descriptions of key concepts associated with a video or piece of + content. A single video could have several tags, although it might + belong to only one category. For example, a video about grilling + food may belong in the Grilling category, but could be tagged + "steak", "meat", "summer", and "outdoor". Create a new + <video:tag> element for each tag associated with a video. + + + + + + + + The video's category - for example, cooking. In general, categories + are broad groupings of content by subject. For example, a site about + cooking could have categories for Broiling, Baking, and Grilling. + + + + + + + + + + + + + Whether the video is suitable for viewing by children. No if the + video should be available only to users with SafeSearch turned off. + + + + + + + + A list of countries where the video may or may not be played. + If there is no <video:restriction> tag, it is assumed that + the video can be played in all territories. + + + + + + + + + Attribute "relationship" specifies whether the video is + restricted or permitted for the specified countries. + + + + + + + + + + + + + + + + + + A link to the gallery (collection of videos) in which this video + appears. + + + + + + + + + The title of the gallery. + + + + + + + + + + + + The price to download or view the video. More than one + <video:price> element can be listed (for example, in order to + specify various currencies). The price value must either be a + non-negative decimal or be empty. If a price value is specified, the + currency attribute is required. If no price value is specified, the + type attribute must be valid and present. The resolution attribute + is optional. + + + + + + + + + The currency in ISO 4217 format. This attribute is required + if a value is given for price. + + + + + + + + + + + + The type (purchase or rent) of price. This value is required + if there is no value given for price. + + + + + + + + + + + + + + + The resolution of the video at this price (SD or HD). + + + + + + + + + + + + + + + + + + + + Indicates whether a subscription (either paid or free) is required + to view the video. + + + + + + + + A name or handle of the video’s uploader. + + + + + + + + + The URL of a webpage with additional information about this + uploader. This URL must be on the same domain as the + <loc> tag. + + + + + + + + + + + + Encloses all information about a single TV video. + + + + + + + + The title of the TV show. This should be the same for all + episodes from the same series. + + + + + + + Describes the relationship of the video to the specified + TV show/episode. + + + + + + + + + + + + + + + + + + + + + + + The title of the episode—for example, "Flesh and Bone" is the + title of the Season 1, Episode 8 episode of Battlestar + Galactica. This tag is not necessary if the video is not + related to a specific episode (for example, if it's a trailer + for an entire series or season). + + + + + + + Only for shows with a per-season schedule. + + + + + + + + + + + + The episode number in number format. For TV shoes with a + per-season schedule, the first episode of each series should + be numbered 1. + + + + + + + + + + + + The date the content of the video was first broadcast, in + W3C format (for example, 2010-11-05.) + + + + + + + + + + + + + + + + + + + + + A list of platforms where the video may or may not be played. + If there is no <video:platform> tag, it is assumed that + the video can be played on all platforms. + + + + + + + + + Attribute "relationship" specifies whether the video is + restricted or permitted for the specified platforms. + + + + + + + + + + + + + + + + + + Whether the video is a live internet broadcast. + + + + + + + + An unambiguous identifier for the video within a given + identification context. + + + + + + + + + The identification context. + + + + + + + + + + + + + + + + + + + + + + diff --git a/spec/support/schemas/sitemap.xsd b/spec/support/schemas/sitemap.xsd new file mode 100644 index 00000000..17efb326 --- /dev/null +++ b/spec/support/schemas/sitemap.xsd @@ -0,0 +1,115 @@ + + + + + XML Schema for Sitemap files. + Last Modifed 2008-03-26 + + + + + + + Container for a set of up to 50,000 document elements. + This is the root element of the XML file. + + + + + + + + + + + + + Container for the data needed to describe a document to crawl. + + + + + + + + + + + + + + + REQUIRED: The location URI of a document. + The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt). + + + + + + + + + + + + OPTIONAL: The date the document was last modified. The date must conform + to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime). + Example: 2005-05-10 + Lastmod may also contain a timestamp. + Example: 2005-05-10T17:33:30+08:00 + + + + + + + + + + + + + + + + OPTIONAL: Indicates how frequently the content at a particular URL is + likely to change. The value "always" should be used to describe + documents that change each time they are accessed. The value "never" + should be used to describe archived URLs. Please note that web + crawlers may not necessarily crawl pages marked "always" more often. + Consider this element as a friendly suggestion and not a command. + + + + + + + + + + + + + + + + + OPTIONAL: The priority of a particular URL relative to other pages + on the same site. The value for this element is a number between + 0.0 and 1.0 where 0.0 identifies the lowest priority page(s). + The default priority of a page is 0.5. Priority is used to select + between pages on your site. Setting a priority of 1.0 for all URLs + will not help you, as the relative priority of pages on your site + is what will be considered. + + + + + + + + + diff --git a/spec/support/xml_macros.rb b/spec/support/xml_macros.rb new file mode 100644 index 00000000..3a12fb41 --- /dev/null +++ b/spec/support/xml_macros.rb @@ -0,0 +1,63 @@ +module XmlMacros + def gzipped_xml_file_should_validate_against_schema(xml_gz_filename, schema_name) + Zlib::GzipReader.open(xml_gz_filename) do |xml_file| + xml_data_should_validate_against_schema(xml_file.read, schema_name) + end + end + + # Validate XML against a local schema file. + # + # `schema_name` gives the name of the schema file to validate against. The schema + # file is looked for in `spec/support/schemas/.xsd`. + def xml_data_should_validate_against_schema(xml, schema_name) + xml = xml.is_a?(String) ? xml : xml.to_s + doc = Nokogiri::XML(xml) + schema_file = File.join(File.dirname(__FILE__), 'schemas', "#{schema_name}.xsd") + schema = Nokogiri::XML::Schema File.read(schema_file) + expect(schema.validate(doc)).to eq([]) + end + + # Validate a fragment of XML against a schema. Builds a document with a root + # node for you so the fragment can be validated. + # + # Unfortunately Nokogiri doesn't support validating + # documents with multiple namespaces. So we have to extract the element + # and create a new document from it. If the xmlns isn't set on the element + # we get an error like: + # + # Element 'video': No matching global declaration available for the validation root. + # + # xml The XML fragment + # schema_name the name of the schema file to validate against. The schema + # file is looked for in `spec/support/schemas/.xsd`. + # xmlns A hash with only one key which gives the XML namespace and associated + # URI. Sometimes one needs to specify a prefix to the namespace, in which case this would + # look like: 'xmlns:video' => 'http://www.google.com/schemas/sitemap-video/1.1' + # + # Example: + # xml_fragment_should_validate_against_schema('