Skip to content

Commit

Permalink
exclude paginated pages in sitemap (#2555)
Browse files Browse the repository at this point in the history
This fixes #2527.
  • Loading branch information
pranitbauva1997 authored Aug 26, 2024
1 parent ce10aae commit 0098119
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 5 deletions.
18 changes: 18 additions & 0 deletions components/config/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ pub enum Mode {
Check,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ExcludePaginatedPagesInSitemap {
None,
All,
}

#[derive(Clone, Debug, Deserialize)]
#[serde(default, deny_unknown_fields)]
pub struct Config {
Expand Down Expand Up @@ -102,6 +109,8 @@ pub struct Config {
pub generate_sitemap: bool,
/// Enables the generation of robots.txt
pub generate_robots_txt: bool,
/// Whether to exclude paginated pages in sitemap; can take values "none", "all"
pub exclude_paginated_pages_in_sitemap: ExcludePaginatedPagesInSitemap,
}

#[derive(Serialize)]
Expand All @@ -123,6 +132,7 @@ pub struct SerializedConfig<'a> {
search: search::SerializedSearch<'a>,
generate_sitemap: bool,
generate_robots_txt: bool,
exclude_paginated_pages_in_sitemap: ExcludePaginatedPagesInSitemap,
}

impl Config {
Expand Down Expand Up @@ -287,6 +297,10 @@ impl Config {
self.mode == Mode::Check
}

pub fn should_exclude_paginated_pages_in_sitemap(&self) -> bool {
self.exclude_paginated_pages_in_sitemap == ExcludePaginatedPagesInSitemap::All
}

pub fn enable_serve_mode(&mut self) {
self.mode = Mode::Serve;
}
Expand Down Expand Up @@ -340,6 +354,7 @@ impl Config {
search: self.search.serialize(),
generate_sitemap: self.generate_sitemap,
generate_robots_txt: self.generate_robots_txt,
exclude_paginated_pages_in_sitemap: self.exclude_paginated_pages_in_sitemap,
}
}
}
Expand Down Expand Up @@ -405,6 +420,7 @@ impl Default for Config {
extra: HashMap::new(),
generate_sitemap: true,
generate_robots_txt: true,
exclude_paginated_pages_in_sitemap: ExcludePaginatedPagesInSitemap::None,
}
}
}
Expand Down Expand Up @@ -1066,4 +1082,6 @@ base_url = "example.com"
let config = Config::parse(config).unwrap();
assert!(config.generate_robots_txt);
}

// TODO: add a test for excluding paginated pages
}
12 changes: 7 additions & 5 deletions components/site/src/sitemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,12 @@ pub fn find_entries<'a>(
}

if let Some(paginate_by) = s.paginate_by() {
let number_pagers = (s.pages.len() as f64 / paginate_by as f64).ceil() as isize;
for i in 1..=number_pagers {
let permalink = format!("{}{}/{}/", s.permalink, s.meta.paginate_path, i);
entries.insert(SitemapEntry::new(Cow::Owned(permalink), &None));
if !config.should_exclude_paginated_pages_in_sitemap() {
let number_pagers = (s.pages.len() as f64 / paginate_by as f64).ceil() as isize;
for i in 1..=number_pagers {
let permalink = format!("{}{}/{}/", s.permalink, s.meta.paginate_path, i);
entries.insert(SitemapEntry::new(Cow::Owned(permalink), &None));
}
}
}
}
Expand All @@ -100,7 +102,7 @@ pub fn find_entries<'a>(
for item in &taxonomy.items {
entries.insert(SitemapEntry::new(Cow::Borrowed(&item.permalink), &None));

if taxonomy.kind.is_paginated() {
if taxonomy.kind.is_paginated() && !config.should_exclude_paginated_pages_in_sitemap() {
let number_pagers = (item.pages.len() as f64
/ taxonomy.kind.paginate_by.unwrap() as f64)
.ceil() as isize;
Expand Down
3 changes: 3 additions & 0 deletions docs/content/documentation/getting-started/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ ignored_static = []
# When set to "true", a feed is automatically generated.
generate_feeds = false

# When set to "all", paginated pages are not a part of the sitemap, default is "none"
exclude_paginated_pages_in_sitemap = "none"

# The filenames to use for the feeds. Used as the template filenames, too.
# Defaults to ["atom.xml"], which has a built-in template that renders an Atom 1.0 feed.
# There is also a built-in template "rss.xml" that renders an RSS 2.0 feed.
Expand Down
6 changes: 6 additions & 0 deletions docs/content/documentation/templates/pagination.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ A paginated taxonomy gets two variables aside from the `paginator` variable:

See the [taxonomies page](@/documentation/templates/taxonomies.md) for a detailed version of the types.

## SEO

It is preferable to not include paginated pages in sitemap since they are non-canonical pages.
To exclude paginated pages in sitemap, set the
`exclude_paginated_pages_in_sitemap` as `all` in `config.toml`.

## Example

Here is an example from a theme on how to use pagination on a page (`index.html` in this case):
Expand Down

0 comments on commit 0098119

Please sign in to comment.