From cf855f800737d1b5c9e3befade125ea02824b8bf Mon Sep 17 00:00:00 2001 From: Jed Date: Tue, 15 Feb 2022 14:46:06 +0100 Subject: [PATCH] Sup 183 create method providing pages tree (#48) * adds self.getPagesTree method, some light refacto * adds urls rewriting * adds options to exclude types from page tree --- CHANGELOG.md | 4 + README.md | 21 ++++++ index.js | 206 +++++++++++++++++++++++++++++++++++++++++++-------- package.json | 2 +- 4 files changed, 200 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56c6e225..c7c14a35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +Adds new `getPageTree` method that returns the nested pages in the right order with pieces that are also pages. To be able to build a sitemap page from any project. + ## 2.6.0 (2021-10-13) Introduced a `rewriteUrl` method, which project developers can override to customize the URLs being output in the sitemap. diff --git a/README.md b/README.md index 2e936dd8..7671c8ad 100644 --- a/README.md +++ b/README.md @@ -257,3 +257,24 @@ module.exports = { }; } }; +``` + +## Getting the page tree programmatically + +This module's primary purpose is creating a sitemap for Google and other search engines, but it is also useful in creating a sitemap for end users. + +In order to build a sitemap page, you can use the method `self.getPageTree` from this module. It returns the nested pages and pieces pages in the right order. For each page you can access the array `_children` recursively to render the pages links at the right level. + +This method has a large performance impact each time it is called on a site with a large page tree, or many pieces reachable via pieces-pages. Strongly consider caching the response for a period of time. + +It is possible to exclude some pages or pieces types only for the page tree, without impacting the normal `sitemap.xml` generation. +The `excludeTypes` option will exclude types from the sitemap file and from the `getPageTree` method. +The `excludeTypesFromPageTree` option will exclude types only from the `getPageTree` method. + +```javascript + { + 'apostrophe-site-map': { + excludeTypesFromPageTree: [ 'article' ] + } + } +``` \ No newline at end of file diff --git a/index.js b/index.js index 7b2b247f..e33d172c 100644 --- a/index.js +++ b/index.js @@ -221,15 +221,13 @@ module.exports = { }; self.getPieces = function(req, locale, callback) { - var modules = _.filter(self.apos.modules, function(module, name) { - return _.find(module.__meta.chain, function(entry) { - return entry.name === 'apostrophe-pieces'; - }); - }); + const modules = self.getPiecesModules(); + return async.eachSeries(modules, function(module, callback) { if (_.includes(self.excludeTypes, module.name)) { return setImmediate(callback); } + // Paginate through 100 (by default) at a time to // avoid slamming memory var done = false; @@ -237,37 +235,48 @@ module.exports = { return async.whilst( function() { return !done; }, function(callback) { - return self.findPieces(req, module).skip(skip).limit(self.piecesPerBatch).toArray(function(err, pieces) { - _.each(pieces, function(piece) { - if (!piece._url) { - // This one has no page to be viewed on - return; - } - // Results in a reasonable priority relative - // to regular pages - piece.level = 3; - // Future events are interesting, - // past events are boring - if (piece.startDate) { - if (piece.startDate > self.today) { - piece.level--; - } else { - piece.level++; + + return self.findPieces(req, module) + .skip(skip) + .limit(self.piecesPerBatch) + .toArray(function(err, pieces) { + _.each(pieces, function(piece) { + if (!piece._url) { + // This one has no page to be viewed on + return; } + + // Results in a reasonable priority relative + // to regular pages + piece.level = 3; + // Future events are interesting, + // past events are boring + if (piece.startDate) { + if (piece.startDate > self.today) { + piece.level--; + } else { + piece.level++; + } + } + self.output(piece); + }); + if (!pieces.length) { + done = true; + } else { + skip += pieces.length; } - self.output(piece); - }); - if (!pieces.length) { - done = true; - } else { - skip += pieces.length; - } - return callback(null); + return callback(null); }); }, callback); }, callback); }; + self.getPiecesModules = () => { + return Object.values(self.apos.modules).filter((mod) => { + return mod.__meta.chain.some((meta) => meta.name === 'apostrophe-pieces') + }) + } + self.writeSitemap = function(callback) { if (!self.perLocale) { // Simple single-file sitemap @@ -444,8 +453,8 @@ module.exports = { return callback(null); }; - self.findPieces = function(req, module) { - return module.find(req).published(true).joins(false).areas(false); + self.findPieces = function(req, module, projection = {}) { + return module.find(req, {}, projection).published(true).joins(false).areas(false); }; // Output the sitemap entry for the given doc, including its children if any. @@ -580,6 +589,139 @@ module.exports = { self.rewriteUrl = url => { return url; - } + }; + + self.getPageTree = async (req) => { + const excludedTypes = [ + 'workflow-document', + ...self.options.excludeTypes || [], + ...self.options.excludeTypesFromPageTree || [], + ] + + const pages = await getPages(); + const pagesWithPieces = await getPieces(pages); + + return rewriteUrls(pagesWithPieces); + + async function getPages () { + try { + const projection = { + _id: 1, + title: 1, + type: 1, + slug: 1, + path: 1, + rank: 1, + level: 1 + } + + const criteria = { + type: { + $nin: excludedTypes + } + } + + const homePage = await self.apos.pages + .find(req, { level: 0 }, projection) + .children({ depth: 1000, orphan: null, projection, and: criteria }) + .toObject(); + + return [ + { + ...homePage, + _children: [] + }, + ...homePage._children + ] + } catch (err) { + self.apos.utils.error(err); + } + } + + async function getPieces (pages) { + const piecesModules = self.getPiecesModules(); + const pieces = []; + + for (const mod of piecesModules) { + if (excludedTypes.includes(mod.name)) { + continue; + } + + await fetchPieces(req, { + mod, + skip: 0, + pieces + }); + } + + return insertPieces(pages, pieces) + + async function fetchPieces (req, { + mod, skip, pieces + }) { + try { + const fetchedPieces = await self.findPieces(req, mod, { _id: 1, title: 1, _url: 1 }) + .skip(skip) + .limit(self.piecesPerBatch) + .toArray(); + + if (!Array.isArray(fetchedPieces)) { + return; + } + + fetchedPieces.forEach(piece => { + if (piece._url && !excludedTypes.includes(piece.type)) { + pieces.push(piece); + } + }); + + if (fetchedPieces.length) { + await fetchPieces(req, { + mod, + skip: skip + fetchedPieces.length, + pieces + }); + } + } catch (err) { + self.apos.utils.error(err); + } + } + + function insertPieces (pages, pieces) { + return pages.reduce((acc, page) => { + const filledChildren = page._children.length + ? insertPieces(page._children, pieces) + : page._children + + const childrenPieces = pieces + .filter((piece) => piece._parentUrl === page._url) + + return [ + ...acc, + { + ...page, + _children: [ + ...filledChildren, + ...childrenPieces + ] + } + ] + }, []) + } + } + + function rewriteUrls (pages = []) { + return pages.reduce((acc, page) => { + return [ + ...acc, + { + ...page, + _url: self.rewriteUrl(page._url), + _children: rewriteUrls(page._children) + } + ] + }, []) + } + }; } }; diff --git a/package.json b/package.json index 72eaaf28..c68d97c9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "apostrophe-site-map", - "version": "2.6.0", + "version": "2.7.0", "description": "Generate site maps for sites powered by the Apostrophe CMS.", "main": "index.js", "dependencies": {