Skip to content

Commit

Permalink
Sup 183 create method providing pages tree (#48)
Browse files Browse the repository at this point in the history
* adds self.getPagesTree method, some light refacto

* adds urls rewriting

* adds options to exclude types from page tree
  • Loading branch information
ValJed authored Feb 15, 2022
1 parent 14dc784 commit cf855f8
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 33 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Unreleased

Adds new `getPageTree` method that returns the nested pages in the right order with pieces that are also pages. To be able to build a sitemap page from any project.

## 2.6.0 (2021-10-13)

Introduced a `rewriteUrl` method, which project developers can override to customize the URLs being output in the sitemap.
Expand Down
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,24 @@ module.exports = {
};
}
};
```

## Getting the page tree programmatically

This module's primary purpose is creating a sitemap for Google and other search engines, but it is also useful in creating a sitemap for end users.

In order to build a sitemap page, you can use the method `self.getPageTree` from this module. It returns the nested pages and pieces pages in the right order. For each page you can access the array `_children` recursively to render the pages links at the right level.

This method has a large performance impact each time it is called on a site with a large page tree, or many pieces reachable via pieces-pages. Strongly consider caching the response for a period of time.

It is possible to exclude some pages or pieces types only for the page tree, without impacting the normal `sitemap.xml` generation.
The `excludeTypes` option will exclude types from the sitemap file and from the `getPageTree` method.
The `excludeTypesFromPageTree` option will exclude types only from the `getPageTree` method.

```javascript
{
'apostrophe-site-map': {
excludeTypesFromPageTree: [ 'article' ]
}
}
```
206 changes: 174 additions & 32 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -221,53 +221,62 @@ module.exports = {
};

self.getPieces = function(req, locale, callback) {
var modules = _.filter(self.apos.modules, function(module, name) {
return _.find(module.__meta.chain, function(entry) {
return entry.name === 'apostrophe-pieces';
});
});
const modules = self.getPiecesModules();

return async.eachSeries(modules, function(module, callback) {
if (_.includes(self.excludeTypes, module.name)) {
return setImmediate(callback);
}

// Paginate through 100 (by default) at a time to
// avoid slamming memory
var done = false;
var skip = 0;
return async.whilst(
function() { return !done; },
function(callback) {
return self.findPieces(req, module).skip(skip).limit(self.piecesPerBatch).toArray(function(err, pieces) {
_.each(pieces, function(piece) {
if (!piece._url) {
// This one has no page to be viewed on
return;
}
// Results in a reasonable priority relative
// to regular pages
piece.level = 3;
// Future events are interesting,
// past events are boring
if (piece.startDate) {
if (piece.startDate > self.today) {
piece.level--;
} else {
piece.level++;

return self.findPieces(req, module)
.skip(skip)
.limit(self.piecesPerBatch)
.toArray(function(err, pieces) {
_.each(pieces, function(piece) {
if (!piece._url) {
// This one has no page to be viewed on
return;
}

// Results in a reasonable priority relative
// to regular pages
piece.level = 3;
// Future events are interesting,
// past events are boring
if (piece.startDate) {
if (piece.startDate > self.today) {
piece.level--;
} else {
piece.level++;
}
}
self.output(piece);
});
if (!pieces.length) {
done = true;
} else {
skip += pieces.length;
}
self.output(piece);
});
if (!pieces.length) {
done = true;
} else {
skip += pieces.length;
}
return callback(null);
return callback(null);
});
}, callback);
}, callback);
};

self.getPiecesModules = () => {
return Object.values(self.apos.modules).filter((mod) => {
return mod.__meta.chain.some((meta) => meta.name === 'apostrophe-pieces')
})
}

self.writeSitemap = function(callback) {
if (!self.perLocale) {
// Simple single-file sitemap
Expand Down Expand Up @@ -444,8 +453,8 @@ module.exports = {
return callback(null);
};

self.findPieces = function(req, module) {
return module.find(req).published(true).joins(false).areas(false);
self.findPieces = function(req, module, projection = {}) {
return module.find(req, {}, projection).published(true).joins(false).areas(false);
};

// Output the sitemap entry for the given doc, including its children if any.
Expand Down Expand Up @@ -580,6 +589,139 @@ module.exports = {

self.rewriteUrl = url => {
return url;
}
};

self.getPageTree = async (req) => {
const excludedTypes = [
'workflow-document',
...self.options.excludeTypes || [],
...self.options.excludeTypesFromPageTree || [],
]

const pages = await getPages();
const pagesWithPieces = await getPieces(pages);

return rewriteUrls(pagesWithPieces);

async function getPages () {
try {
const projection = {
_id: 1,
title: 1,
type: 1,
slug: 1,
path: 1,
rank: 1,
level: 1
}

const criteria = {
type: {
$nin: excludedTypes
}
}

const homePage = await self.apos.pages
.find(req, { level: 0 }, projection)
.children({ depth: 1000, orphan: null, projection, and: criteria })
.toObject();

return [
{
...homePage,
_children: []
},
...homePage._children
]
} catch (err) {
self.apos.utils.error(err);
}
}

async function getPieces (pages) {
const piecesModules = self.getPiecesModules();
const pieces = [];

for (const mod of piecesModules) {
if (excludedTypes.includes(mod.name)) {
continue;
}

await fetchPieces(req, {
mod,
skip: 0,
pieces
});
}

return insertPieces(pages, pieces)

async function fetchPieces (req, {
mod, skip, pieces
}) {
try {
const fetchedPieces = await self.findPieces(req, mod, { _id: 1, title: 1, _url: 1 })
.skip(skip)
.limit(self.piecesPerBatch)
.toArray();

if (!Array.isArray(fetchedPieces)) {
return;
}

fetchedPieces.forEach(piece => {
if (piece._url && !excludedTypes.includes(piece.type)) {
pieces.push(piece);
}
});

if (fetchedPieces.length) {
await fetchPieces(req, {
mod,
skip: skip + fetchedPieces.length,
pieces
});
}
} catch (err) {
self.apos.utils.error(err);
}
}

function insertPieces (pages, pieces) {
return pages.reduce((acc, page) => {
const filledChildren = page._children.length
? insertPieces(page._children, pieces)
: page._children

const childrenPieces = pieces
.filter((piece) => piece._parentUrl === page._url)

return [
...acc,
{
...page,
_children: [
...filledChildren,
...childrenPieces
]
}
]
}, [])
}
}

function rewriteUrls (pages = []) {
return pages.reduce((acc, page) => {
return [
...acc,
{
...page,
_url: self.rewriteUrl(page._url),
_children: rewriteUrls(page._children)
}
]
}, [])
}
};
}
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "apostrophe-site-map",
"version": "2.6.0",
"version": "2.7.0",
"description": "Generate site maps for sites powered by the Apostrophe CMS.",
"main": "index.js",
"dependencies": {
Expand Down

0 comments on commit cf855f8

Please sign in to comment.