From c4a4fdad7d1990e6bd738869bb50374c6461769f Mon Sep 17 00:00:00 2001 From: Eric DeLabar Date: Wed, 3 Sep 2014 11:20:09 -0400 Subject: [PATCH 1/4] Refactored last modified calculation into the Page/Post object --- sitemap_generator.rb | 99 +++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/sitemap_generator.rb b/sitemap_generator.rb index 347d3f3..129f8e1 100644 --- a/sitemap_generator.rb +++ b/sitemap_generator.rb @@ -29,23 +29,67 @@ def path_to_source def location_on_server(my_url) "#{my_url}#{url}" end + + def last_modified + latest_date = Time.new + + if (File.exists? self.full_path_to_source) + latest_date = File.mtime self.full_path_to_source + + layouts = self.site.layouts + layout = layouts[self.data["layout"]] + while layout + date = layout.last_modified + + latest_date = date if (date > latest_date) + + layout = layouts[layout.data["layout"]] + end + end + + latest_date + end end class Page attr_accessor :name def full_path_to_source - File.join(@base, @dir, @name) + unless @base.nil? || @dir.nil? || @name.nil? + return File.join(@base, @dir, @name) + end end def path_to_source - File.join(@dir, @name) + unless @dir.nil? || @name.nil? + return File.join(@dir, @name) + end end def location_on_server(my_url) location = "#{my_url}#{url}" location.gsub(/index.html$/, "") end + + def last_modified + latest_date = Time.new + + if (File.exists? self.full_path_to_source) + latest_date = File.mtime self.full_path_to_source + + layouts = self.site.layouts + layout = layouts[self.data["layout"]] + while layout + date = layout.last_modified + + latest_date = date if (date > latest_date) + + layout = layouts[layout.data["layout"]] + end + end + + latest_date + end end @@ -53,6 +97,10 @@ class Layout def full_path_to_source File.join(@base, @name) end + + def last_modified + File.mtime self.full_path_to_source + end end # Recover from strange exception when starting server without --auto @@ -63,6 +111,7 @@ def write(dest) end class SitemapGenerator < Generator + priority :lowest # Config defaults SITEMAP_FILE_NAME = "/sitemap.xml" @@ -120,13 +169,12 @@ def generate(site) def fill_posts(site, urlset) last_modified_date = nil site.posts.each do |post| - if !excluded?(site, post.name) + if !excluded?(post.name) url = fill_url(site, post) urlset.add_element(url) end - path = post.full_path_to_source - date = File.mtime(path) + date = post.last_modified last_modified_date = date if last_modified_date == nil or date > last_modified_date end @@ -139,12 +187,9 @@ def fill_posts(site, urlset) # Returns last_modified_date of index page def fill_pages(site, urlset) site.pages.each do |page| - if !excluded?(site, page.path_to_source) - path = page.full_path_to_source - if File.exists?(path) - url = fill_url(site, page) - urlset.add_element(url) - end + if !excluded?(page.path_to_source) + url = fill_url(site, page) + urlset.add_element(url) end end end @@ -162,8 +207,6 @@ def fill_url(site, page_or_post) lastmod = fill_last_modified(site, page_or_post) url.add_element(lastmod) if lastmod - - if (page_or_post.data[@config['change_frequency_name']]) change_frequency = page_or_post.data[@config['change_frequency_name']].downcase @@ -209,15 +252,14 @@ def fill_last_modified(site, page_or_post) path = page_or_post.full_path_to_source lastmod = REXML::Element.new "lastmod" - date = File.mtime(path) - latest_date = find_latest_date(date, site, page_or_post) + latest_date = page_or_post.last_modified if @last_modified_post_date == nil # This is a post lastmod.text = latest_date.iso8601 else # This is a page - if posts_included?(site, page_or_post.path_to_source) + if posts_included?(page_or_post.path_to_source) # We want to take into account the last post date final_date = greater_date(latest_date, @last_modified_post_date) lastmod.text = final_date.iso8601 @@ -228,25 +270,6 @@ def fill_last_modified(site, page_or_post) lastmod end - # Go through the page/post and any implemented layouts and get the latest - # modified date - # - # Returns formatted output of latest date of page/post and any used layouts - def find_latest_date(latest_date, site, page_or_post) - layouts = site.layouts - layout = layouts[page_or_post.data["layout"]] - while layout - path = layout.full_path_to_source - date = File.mtime(path) - - latest_date = date if (date > latest_date) - - layout = layouts[layout.data["layout"]] - end - - latest_date - end - # Which of the two dates is later # # Returns latest of two dates @@ -261,11 +284,11 @@ def greater_date(date1, date2) # Is the page or post listed as something we want to exclude? # # Returns boolean - def excluded?(site, name) + def excluded?(name) @config['exclude'].include? name end - def posts_included?(site, name) + def posts_included?(name) @config['include_posts'].include? name end @@ -289,4 +312,4 @@ def valid_priority?(priority) false end end -end +end \ No newline at end of file From d6c5504c544adbd9572781881523638fc3622089 Mon Sep 17 00:00:00 2001 From: Eric DeLabar Date: Wed, 3 Sep 2014 11:34:57 -0400 Subject: [PATCH 2/4] Changed posts_included? to accept Regex so generated index pages can be set by URL pattern --- sitemap_generator.rb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sitemap_generator.rb b/sitemap_generator.rb index 129f8e1..73a02c8 100644 --- a/sitemap_generator.rb +++ b/sitemap_generator.rb @@ -289,7 +289,19 @@ def excluded?(name) end def posts_included?(name) - @config['include_posts'].include? name + @config['include_posts'].each do |entry| + + begin + re = Regexp.new(entry) + rescue + re = Regexp.new(Regexp.escape(entry)) + end + + if re =~ name + return true + end + + end end # Is the change frequency value provided valid according to the spec From 4ee20a18fb185416d312eb773c09de55c5ec9723 Mon Sep 17 00:00:00 2001 From: Eric DeLabar Date: Wed, 3 Sep 2014 12:09:28 -0400 Subject: [PATCH 3/4] Rolled-back posts_included Regex change, it didn't work. --- sitemap_generator.rb | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sitemap_generator.rb b/sitemap_generator.rb index 73a02c8..32c297d 100644 --- a/sitemap_generator.rb +++ b/sitemap_generator.rb @@ -260,10 +260,12 @@ def fill_last_modified(site, page_or_post) else # This is a page if posts_included?(page_or_post.path_to_source) + $stdout.puts "Posts Included: #{page_or_post.url} last_modified: #{page_or_post.last_modified}" # We want to take into account the last post date final_date = greater_date(latest_date, @last_modified_post_date) lastmod.text = final_date.iso8601 else + $stdout.puts "Posts NOT Included: #{page_or_post.url} last_modified: #{latest_date}" lastmod.text = latest_date.iso8601 end end @@ -289,19 +291,7 @@ def excluded?(name) end def posts_included?(name) - @config['include_posts'].each do |entry| - - begin - re = Regexp.new(entry) - rescue - re = Regexp.new(Regexp.escape(entry)) - end - - if re =~ name - return true - end - - end + @config['include_posts'].include? name end # Is the change frequency value provided valid according to the spec From d43af0e7ce0972e72e846bad1ff466607e7747a7 Mon Sep 17 00:00:00 2001 From: Eric DeLabar Date: Wed, 3 Sep 2014 12:27:22 -0400 Subject: [PATCH 4/4] Added metaprogramming examples and update readme for intelligent last_modified dates --- README.md | 11 +++++- generate_categories_last_modified.rb | 22 ++++++++++++ sitemap_generator.rb | 37 +++++++++++++++++-- temporal_archive_generator_last_modified.rb | 39 +++++++++++++++++++++ 4 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 generate_categories_last_modified.rb create mode 100644 temporal_archive_generator_last_modified.rb diff --git a/README.md b/README.md index d383fd6..2ac4abb 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Sitemap.xml Generator is a Jekyll plugin that generates a sitemap.xml file by tr How To Use: ----------- -1. Copy file into your _plugins folder within your Jekyll project. +1. Copy the sitemap_generator.rb file into your _plugins folder within your Jekyll project. 2. Ensure url is set in your config file (for example `url: http://danielgroves.net`) 3. In your config file, change `sitemap: file:` if you want your sitemap to be called something other than sitemap.xml. 4. Change the `sitemap: exclude:` list to exclude any pages that you don't want in the sitemap. @@ -32,6 +32,15 @@ Customizations: --------------- If you want to include the optional changefreq and priority attributes, simply include custom variables in the YAML Front Matter of those files. The names of these custom variables are defined in the config file as `sitemap: change_frequency_name:` and `sitemap: priority_name:`. +Last Modified and Generators: +----------------------------- +Page and Post subclasses can implement their own last_modified methods to provide an intelligent value to this generator. Rather than changing and coupling the other generators to this generator, we can add the last_modified method with meta programming. Two examples are provided: + +1. ```generate_categories_last_modified.rb``` adds a last_modified method to the CategoryIndex page generated by the Jekyll category page generator (http://recursive-design.com/projects/jekyll-plugins/) that is calculated by the latest last_modified date of all posts in the category. +2. ```temporal_archive_generator_last_modified.rb``` adds a last_modified method to the MonthlyIndexPage pages and YearlyIndexPages generated by the Yearly and Monthly Archive Generator for Jekyll (https://github.com/edelabar/jekyll-temporal-archive-generator) that is calculated by the latest last_modified date of all posts in the given time frame. + +This generator runs at ```:lowest``` priority in Jekyll, as long as all other generators are higher than that their results will be included in the sitemap. + Notes: ------ 1. The last modified date is determined by the latest date of the following: system modified date of the page or post, system modified date of included layout, system modified date of included layout within that layout, ... diff --git a/generate_categories_last_modified.rb b/generate_categories_last_modified.rb new file mode 100644 index 0000000..c60530d --- /dev/null +++ b/generate_categories_last_modified.rb @@ -0,0 +1,22 @@ +module Jekyll + + # Create an override for generate_categories.rb to add the last_modified method from sitemap_generator.rb + class CategoryPage < Page + + def last_modified + + latest_date = Time.at(0) # Unix Epoch + + self.site.categories[self.data['category']].each do |post| + + latest_date = self.greater_date(latest_date, post.last_modified) + + end + + latest_date + + end + + end + +end \ No newline at end of file diff --git a/sitemap_generator.rb b/sitemap_generator.rb index 32c297d..950a41a 100644 --- a/sitemap_generator.rb +++ b/sitemap_generator.rb @@ -49,6 +49,17 @@ def last_modified latest_date end + + # Which of the two dates is later + # + # Returns latest of two dates + def greater_date(date1, date2) + if (date1 >= date2) + date1 + else + date2 + end + end end class Page @@ -90,6 +101,18 @@ def last_modified latest_date end + + # Which of the two dates is later + # + # Returns latest of two dates + def greater_date(date1, date2) + if (date1 >= date2) + date1 + else + date2 + end + end + end @@ -101,6 +124,18 @@ def full_path_to_source def last_modified File.mtime self.full_path_to_source end + + # Which of the two dates is later + # + # Returns latest of two dates + def greater_date(date1, date2) + if (date1 >= date2) + date1 + else + date2 + end + end + end # Recover from strange exception when starting server without --auto @@ -260,12 +295,10 @@ def fill_last_modified(site, page_or_post) else # This is a page if posts_included?(page_or_post.path_to_source) - $stdout.puts "Posts Included: #{page_or_post.url} last_modified: #{page_or_post.last_modified}" # We want to take into account the last post date final_date = greater_date(latest_date, @last_modified_post_date) lastmod.text = final_date.iso8601 else - $stdout.puts "Posts NOT Included: #{page_or_post.url} last_modified: #{latest_date}" lastmod.text = latest_date.iso8601 end end diff --git a/temporal_archive_generator_last_modified.rb b/temporal_archive_generator_last_modified.rb new file mode 100644 index 0000000..9d3cf08 --- /dev/null +++ b/temporal_archive_generator_last_modified.rb @@ -0,0 +1,39 @@ +module Jekyll + + # Create an override for temporal_archive_generator.rb to add the last_modified method from sitemap_generator.rb + class MonthlyIndexPage < Page + + def last_modified + + latest_date = Time.at(0) # Unix Epoch + + self.data['posts'].each do |post| + + latest_date = self.greater_date(latest_date, post.last_modified()) + + end + + latest_date + + end + + end + class YearlyIndexPage < Page + + def last_modified + + latest_date = Time.at(0) # Unix Epoch + + self.data['posts'].each do |post| + + latest_date = self.greater_date(latest_date, post.last_modified()) + + end + + latest_date + + end + + end + +end \ No newline at end of file