diff --git a/README.md b/README.md index d383fd6..2ac4abb 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Sitemap.xml Generator is a Jekyll plugin that generates a sitemap.xml file by tr How To Use: ----------- -1. Copy file into your _plugins folder within your Jekyll project. +1. Copy the sitemap_generator.rb file into your _plugins folder within your Jekyll project. 2. Ensure url is set in your config file (for example `url: http://danielgroves.net`) 3. In your config file, change `sitemap: file:` if you want your sitemap to be called something other than sitemap.xml. 4. Change the `sitemap: exclude:` list to exclude any pages that you don't want in the sitemap. @@ -32,6 +32,15 @@ Customizations: --------------- If you want to include the optional changefreq and priority attributes, simply include custom variables in the YAML Front Matter of those files. The names of these custom variables are defined in the config file as `sitemap: change_frequency_name:` and `sitemap: priority_name:`. +Last Modified and Generators: +----------------------------- +Page and Post subclasses can implement their own last_modified methods to provide an intelligent value to this generator. Rather than changing and coupling the other generators to this generator, we can add the last_modified method with meta programming. Two examples are provided: + +1. ```generate_categories_last_modified.rb``` adds a last_modified method to the CategoryIndex page generated by the Jekyll category page generator (http://recursive-design.com/projects/jekyll-plugins/) that is calculated by the latest last_modified date of all posts in the category. +2. ```temporal_archive_generator_last_modified.rb``` adds a last_modified method to the MonthlyIndexPage pages and YearlyIndexPages generated by the Yearly and Monthly Archive Generator for Jekyll (https://github.com/edelabar/jekyll-temporal-archive-generator) that is calculated by the latest last_modified date of all posts in the given time frame. + +This generator runs at ```:lowest``` priority in Jekyll, as long as all other generators are higher than that their results will be included in the sitemap. + Notes: ------ 1. The last modified date is determined by the latest date of the following: system modified date of the page or post, system modified date of included layout, system modified date of included layout within that layout, ... diff --git a/generate_categories_last_modified.rb b/generate_categories_last_modified.rb new file mode 100644 index 0000000..c60530d --- /dev/null +++ b/generate_categories_last_modified.rb @@ -0,0 +1,22 @@ +module Jekyll + + # Create an override for generate_categories.rb to add the last_modified method from sitemap_generator.rb + class CategoryPage < Page + + def last_modified + + latest_date = Time.at(0) # Unix Epoch + + self.site.categories[self.data['category']].each do |post| + + latest_date = self.greater_date(latest_date, post.last_modified) + + end + + latest_date + + end + + end + +end \ No newline at end of file diff --git a/sitemap_generator.rb b/sitemap_generator.rb index 347d3f3..950a41a 100644 --- a/sitemap_generator.rb +++ b/sitemap_generator.rb @@ -29,23 +29,90 @@ def path_to_source def location_on_server(my_url) "#{my_url}#{url}" end + + def last_modified + latest_date = Time.new + + if (File.exists? self.full_path_to_source) + latest_date = File.mtime self.full_path_to_source + + layouts = self.site.layouts + layout = layouts[self.data["layout"]] + while layout + date = layout.last_modified + + latest_date = date if (date > latest_date) + + layout = layouts[layout.data["layout"]] + end + end + + latest_date + end + + # Which of the two dates is later + # + # Returns latest of two dates + def greater_date(date1, date2) + if (date1 >= date2) + date1 + else + date2 + end + end end class Page attr_accessor :name def full_path_to_source - File.join(@base, @dir, @name) + unless @base.nil? || @dir.nil? || @name.nil? + return File.join(@base, @dir, @name) + end end def path_to_source - File.join(@dir, @name) + unless @dir.nil? || @name.nil? + return File.join(@dir, @name) + end end def location_on_server(my_url) location = "#{my_url}#{url}" location.gsub(/index.html$/, "") end + + def last_modified + latest_date = Time.new + + if (File.exists? self.full_path_to_source) + latest_date = File.mtime self.full_path_to_source + + layouts = self.site.layouts + layout = layouts[self.data["layout"]] + while layout + date = layout.last_modified + + latest_date = date if (date > latest_date) + + layout = layouts[layout.data["layout"]] + end + end + + latest_date + end + + # Which of the two dates is later + # + # Returns latest of two dates + def greater_date(date1, date2) + if (date1 >= date2) + date1 + else + date2 + end + end + end @@ -53,6 +120,22 @@ class Layout def full_path_to_source File.join(@base, @name) end + + def last_modified + File.mtime self.full_path_to_source + end + + # Which of the two dates is later + # + # Returns latest of two dates + def greater_date(date1, date2) + if (date1 >= date2) + date1 + else + date2 + end + end + end # Recover from strange exception when starting server without --auto @@ -63,6 +146,7 @@ def write(dest) end class SitemapGenerator < Generator + priority :lowest # Config defaults SITEMAP_FILE_NAME = "/sitemap.xml" @@ -120,13 +204,12 @@ def generate(site) def fill_posts(site, urlset) last_modified_date = nil site.posts.each do |post| - if !excluded?(site, post.name) + if !excluded?(post.name) url = fill_url(site, post) urlset.add_element(url) end - path = post.full_path_to_source - date = File.mtime(path) + date = post.last_modified last_modified_date = date if last_modified_date == nil or date > last_modified_date end @@ -139,12 +222,9 @@ def fill_posts(site, urlset) # Returns last_modified_date of index page def fill_pages(site, urlset) site.pages.each do |page| - if !excluded?(site, page.path_to_source) - path = page.full_path_to_source - if File.exists?(path) - url = fill_url(site, page) - urlset.add_element(url) - end + if !excluded?(page.path_to_source) + url = fill_url(site, page) + urlset.add_element(url) end end end @@ -162,8 +242,6 @@ def fill_url(site, page_or_post) lastmod = fill_last_modified(site, page_or_post) url.add_element(lastmod) if lastmod - - if (page_or_post.data[@config['change_frequency_name']]) change_frequency = page_or_post.data[@config['change_frequency_name']].downcase @@ -209,15 +287,14 @@ def fill_last_modified(site, page_or_post) path = page_or_post.full_path_to_source lastmod = REXML::Element.new "lastmod" - date = File.mtime(path) - latest_date = find_latest_date(date, site, page_or_post) + latest_date = page_or_post.last_modified if @last_modified_post_date == nil # This is a post lastmod.text = latest_date.iso8601 else # This is a page - if posts_included?(site, page_or_post.path_to_source) + if posts_included?(page_or_post.path_to_source) # We want to take into account the last post date final_date = greater_date(latest_date, @last_modified_post_date) lastmod.text = final_date.iso8601 @@ -228,25 +305,6 @@ def fill_last_modified(site, page_or_post) lastmod end - # Go through the page/post and any implemented layouts and get the latest - # modified date - # - # Returns formatted output of latest date of page/post and any used layouts - def find_latest_date(latest_date, site, page_or_post) - layouts = site.layouts - layout = layouts[page_or_post.data["layout"]] - while layout - path = layout.full_path_to_source - date = File.mtime(path) - - latest_date = date if (date > latest_date) - - layout = layouts[layout.data["layout"]] - end - - latest_date - end - # Which of the two dates is later # # Returns latest of two dates @@ -261,11 +319,11 @@ def greater_date(date1, date2) # Is the page or post listed as something we want to exclude? # # Returns boolean - def excluded?(site, name) + def excluded?(name) @config['exclude'].include? name end - def posts_included?(site, name) + def posts_included?(name) @config['include_posts'].include? name end @@ -289,4 +347,4 @@ def valid_priority?(priority) false end end -end +end \ No newline at end of file diff --git a/temporal_archive_generator_last_modified.rb b/temporal_archive_generator_last_modified.rb new file mode 100644 index 0000000..9d3cf08 --- /dev/null +++ b/temporal_archive_generator_last_modified.rb @@ -0,0 +1,39 @@ +module Jekyll + + # Create an override for temporal_archive_generator.rb to add the last_modified method from sitemap_generator.rb + class MonthlyIndexPage < Page + + def last_modified + + latest_date = Time.at(0) # Unix Epoch + + self.data['posts'].each do |post| + + latest_date = self.greater_date(latest_date, post.last_modified()) + + end + + latest_date + + end + + end + class YearlyIndexPage < Page + + def last_modified + + latest_date = Time.at(0) # Unix Epoch + + self.data['posts'].each do |post| + + latest_date = self.greater_date(latest_date, post.last_modified()) + + end + + latest_date + + end + + end + +end \ No newline at end of file