From 5bb1c8e0e5e63b2e60bd231380330ca33b8c1e48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Mon, 8 Apr 2024 11:29:48 +0200 Subject: [PATCH 1/7] Removed tailing whitespaces --- lib/json_generator.js | 84 +++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/lib/json_generator.js b/lib/json_generator.js index 171c812..8dc091c 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -23,67 +23,67 @@ module.exports = function(locals){ posts = locals.posts.sort('-date'); } - var res = new Array() + var res = new Array() var index = 0 - - if(posts){ + + if(posts){ posts.each(function(post) { if (post.indexing != undefined && !post.indexing) return; - var temp_post = new Object() + var temp_post = new Object() temp_post.title = post.title || 'No Title' - if (post.path) { - temp_post.url = config.root + post.path - } - if (content != false && post._content) { - temp_post.content = post._content - } - if (post.tags && post.tags.length > 0) { + if (post.path) { + temp_post.url = config.root + post.path + } + if (content != false && post._content) { + temp_post.content = post._content + } + if (post.tags && post.tags.length > 0) { var tags = []; post.tags.forEach(function (tag) { tags.push(tag.name); - }); - temp_post.tags = tags - } - if (post.categories && post.categories.length > 0) { + }); + temp_post.tags = tags + } + if (post.categories && post.categories.length > 0) { var categories = []; post.categories.forEach(function (cate) { categories.push(cate.name); - }); - temp_post.categories = categories - } - res[index] = temp_post; - index += 1; - }); - } - if(pages){ + }); + temp_post.categories = categories + } + res[index] = temp_post; + index += 1; + }); + } + if(pages){ pages.each(function(page){ if (page.indexing != undefined && !page.indexing) return; - var temp_page = new Object() + var temp_page = new Object() temp_post.title = post.title || 'No Title' - if (page.path) { - temp_page.url = config.root + page.path - } - if (content != false && page._content) { - temp_page.content = page._content - } - if (page.tags && page.tags.length > 0) { - var tags = new Array() - var tag_index = 0 + if (page.path) { + temp_page.url = config.root + page.path + } + if (content != false && page._content) { + temp_page.content = page._content + } + if (page.tags && page.tags.length > 0) { + var tags = new Array() + var tag_index = 0 page.tags.each(function (tag) { - tags[tag_index] = tag.name; - }); - temp_page.tags = tags - } + tags[tag_index] = tag.name; + }); + temp_page.tags = tags + } if (page.categories && page.categories.length > 0) { temp_page.categories = [] (page.categories.each || page.categories.forEach)(function (item) { temp_page.categories.push(item); }); - } - res[index] = temp_page; - index += 1; - }); - } + } + res[index] = temp_page; + index += 1; + }); + } var json = JSON.stringify(res); From b321a133a870504033cd24ebb523ac2b0a13e255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Mon, 8 Apr 2024 12:48:25 +0200 Subject: [PATCH 2/7] Added content options rendered and raw for json output Added strip_html option to strip out all html tags (json+xml) Also fixed title issue for posts (json) --- README.md | 13 +++++++++---- lib/json_generator.js | 31 ++++++++++++++++++++++++++----- lib/xml_generator.js | 13 ++++++++++++- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index eb3fd1c..c512f51 100644 --- a/README.md +++ b/README.md @@ -29,11 +29,16 @@ search: - **path** - file path. By default is `search.xml` . If the file extension is `.json`, the output format will be JSON. Otherwise XML format file will be exported. - **field** - the search scope you want to search, you can chose: - * **post** (Default) - will only covers all the posts of your blog. - * **page** - will only covers all the pages of your blog. - * **all** - will covers all the posts and pages of your blog. -- **content** - whether contains the whole content of each article. If `false`, the generated results only cover title and other meta info without mainbody. By default is `true`. + * `post` (Default) - will only covers all the posts of your blog. + * `page` - will only covers all the pages of your blog. + * `all` - will covers all the posts and pages of your blog. +- **content** - whether contains the whole content of each article. + * `true` (Default) - generated results use the mainbody. + * `rendered` - generated results use the rendered mainbody if available. (json only) + * `raw` - generated results use the raw mainbody if available. (json only, also contains the front-matter) + * `false` - generated results only cover title and other meta info without mainbody. - **template** (Optional) - path to a custom XML template +- **strip_html** (Optional) - when `true` all HTML tags will be removed from the content. ## Exclude indexing diff --git a/lib/json_generator.js b/lib/json_generator.js index 8dc091c..33a9b7e 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -6,6 +6,9 @@ module.exports = function(locals){ var searchConfig = config.search; var searchfield = searchConfig.field; var content = searchConfig.content; + if (content == undefined) content = true; + var stripHtml = searchConfig.strip_html; + if (stripHtml == undefined) stripHtml = false; var posts, pages; @@ -34,8 +37,17 @@ module.exports = function(locals){ if (post.path) { temp_post.url = config.root + post.path } - if (content != false && post._content) { - temp_post.content = post._content + if (content != false) { + if (content == 'rendered' && post.content) { + temp_post.content = post.content + } else if (content == 'raw' && post.raw) { + temp_post.content = post.raw + } else if (post._content) { + temp_post.content = post._content + } + if (stripHtml) { + temp_post.content = temp_post.content.replace(/<[^>]+>/g, '') + } } if (post.tags && post.tags.length > 0) { var tags = []; @@ -59,12 +71,21 @@ module.exports = function(locals){ pages.each(function(page){ if (page.indexing != undefined && !page.indexing) return; var temp_page = new Object() - temp_post.title = post.title || 'No Title' + temp_page.title = page.title || 'No Title' if (page.path) { temp_page.url = config.root + page.path } - if (content != false && page._content) { - temp_page.content = page._content + if (content != false) { + if (content == 'rendered' && page.content) { + temp_page.content = page.content + } else if (content == 'raw' && page.raw) { + temp_page.content = page.raw + } else if (page._content) { + temp_page.content = page._content + } + if (stripHtml) { + temp_post.content = temp_post.content.replace(/<[^>]+>/g, '') + } } if (page.tags && page.tags.length > 0) { var tags = new Array() diff --git a/lib/xml_generator.js b/lib/xml_generator.js index de47e28..3ca1aba 100644 --- a/lib/xml_generator.js +++ b/lib/xml_generator.js @@ -22,7 +22,9 @@ module.exports = function(locals){ var template = searchTmpl; var searchfield = searchConfig.field; var content = searchConfig.content; - if (content == undefined) content=true; + if (content == undefined) content = true; + var stripHtml = searchConfig.strip_html; + if (stripHtml == undefined) stripHtml = false; var posts, pages; @@ -40,6 +42,15 @@ module.exports = function(locals){ posts = locals.posts.sort('-date'); } + if (stripHtml) { + posts.data.forEach(function (p) { + p.content = p.content.replace(/<[^>]+>/g, ''); + }); + pages.data.forEach(function (p) { + p.content = p.content.replace(/<[^>]+>/g, ''); + }); + } + var rootURL; if (config.root == null){ rootURL = "/"; From a640fbbbab2f8d9ed743dc3f5c92d9cb13114ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Mon, 8 Apr 2024 13:11:18 +0200 Subject: [PATCH 3/7] Added permalinks option to include tags/categories permalinks in json --- README.md | 1 + lib/json_generator.js | 37 ++++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index c512f51..d788bf4 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ search: * `false` - generated results only cover title and other meta info without mainbody. - **template** (Optional) - path to a custom XML template - **strip_html** (Optional) - when `true` all HTML tags will be removed from the content. +- **permalinks** (Optional) - when `true` the tags and categories in json output will also contain the permalinks. ## Exclude indexing diff --git a/lib/json_generator.js b/lib/json_generator.js index 33a9b7e..86207b4 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -9,6 +9,8 @@ module.exports = function(locals){ if (content == undefined) content = true; var stripHtml = searchConfig.strip_html; if (stripHtml == undefined) stripHtml = false; + var permalinks = searchConfig.permalinks; + if (permalinks == undefined) permalinks = false; var posts, pages; @@ -50,18 +52,24 @@ module.exports = function(locals){ } } if (post.tags && post.tags.length > 0) { - var tags = []; + temp_post.tags = []; post.tags.forEach(function (tag) { - tags.push(tag.name); + if (permalinks) { + temp_post.tags.push([ tag.name, tag.permalink ]); + } else { + temp_post.tags.push(tag.name); + } }); - temp_post.tags = tags } if (post.categories && post.categories.length > 0) { - var categories = []; + temp_post.categories = []; post.categories.forEach(function (cate) { - categories.push(cate.name); + if (permalinks) { + temp_post.categories.push([ cate.name, cate.permalink ]); + } else { + temp_post.categories.push(cate.name); + } }); - temp_post.categories = categories } res[index] = temp_post; index += 1; @@ -88,17 +96,24 @@ module.exports = function(locals){ } } if (page.tags && page.tags.length > 0) { - var tags = new Array() - var tag_index = 0 + temp_page.tags = []; page.tags.each(function (tag) { - tags[tag_index] = tag.name; + if (permalinks) { + temp_page.tags.push([ tag.name, tag.permalink ]); + } else { + temp_page.tags.push(tag.name); + } }); - temp_page.tags = tags } if (page.categories && page.categories.length > 0) { - temp_page.categories = [] + temp_page.categories = []; (page.categories.each || page.categories.forEach)(function (item) { temp_page.categories.push(item); + if (permalinks) { + temp_page.categories.push([item.name, item.permalink]); + } else { + temp_page.categories.push(item.name); + } }); } res[index] = temp_page; From 1b9c7e42ee612f6d358151b360cbd3d804f18d79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Mon, 8 Apr 2024 13:24:01 +0200 Subject: [PATCH 4/7] Added content:excerpt option Will close #68 --- README.md | 1 + lib/json_generator.js | 4 ++++ lib/xml_generator.js | 13 +++++++++++++ 3 files changed, 18 insertions(+) diff --git a/README.md b/README.md index d788bf4..0dfa6cc 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ search: - **content** - whether contains the whole content of each article. * `true` (Default) - generated results use the mainbody. * `rendered` - generated results use the rendered mainbody if available. (json only) + * `excerpt` - generated results use the excerpts as content if available. * `raw` - generated results use the raw mainbody if available. (json only, also contains the front-matter) * `false` - generated results only cover title and other meta info without mainbody. - **template** (Optional) - path to a custom XML template diff --git a/lib/json_generator.js b/lib/json_generator.js index 86207b4..297576a 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -42,6 +42,8 @@ module.exports = function(locals){ if (content != false) { if (content == 'rendered' && post.content) { temp_post.content = post.content + } else if (content == 'excerpt' && post.excerpt) { + temp_post.content = post.excerpt } else if (content == 'raw' && post.raw) { temp_post.content = post.raw } else if (post._content) { @@ -86,6 +88,8 @@ module.exports = function(locals){ if (content != false) { if (content == 'rendered' && page.content) { temp_page.content = page.content + } else if (content == 'excerpt' && page.excerpt) { + temp_page.content = page.excerpt } else if (content == 'raw' && page.raw) { temp_page.content = page.raw } else if (page._content) { diff --git a/lib/xml_generator.js b/lib/xml_generator.js index 3ca1aba..9846e86 100644 --- a/lib/xml_generator.js +++ b/lib/xml_generator.js @@ -42,6 +42,19 @@ module.exports = function(locals){ posts = locals.posts.sort('-date'); } + if (content == 'excerpt') { + posts.data.forEach(function (p) { + if (p.excerpt) { + p.content = p.excerpt; + } + }); + pages.data.forEach(function (p) { + if (p.excerpt) { + p.content = p.excerpt; + } + }); + } + if (stripHtml) { posts.data.forEach(function (p) { p.content = p.content.replace(/<[^>]+>/g, ''); From 3f615aa5c5822237e747c4d418a0aa557bb9d621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Mon, 8 Apr 2024 13:29:50 +0200 Subject: [PATCH 5/7] Removed page tags and categories from json output Pages don't support tags and categories. Closes #54 --- lib/json_generator.js | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/lib/json_generator.js b/lib/json_generator.js index 297576a..f3d0671 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -99,27 +99,6 @@ module.exports = function(locals){ temp_post.content = temp_post.content.replace(/<[^>]+>/g, '') } } - if (page.tags && page.tags.length > 0) { - temp_page.tags = []; - page.tags.each(function (tag) { - if (permalinks) { - temp_page.tags.push([ tag.name, tag.permalink ]); - } else { - temp_page.tags.push(tag.name); - } - }); - } - if (page.categories && page.categories.length > 0) { - temp_page.categories = []; - (page.categories.each || page.categories.forEach)(function (item) { - temp_page.categories.push(item); - if (permalinks) { - temp_page.categories.push([item.name, item.permalink]); - } else { - temp_page.categories.push(item.name); - } - }); - } res[index] = temp_page; index += 1; }); From 38112c779c4f77ed94dc64291d781822aa2c3a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Mon, 8 Apr 2024 13:33:06 +0200 Subject: [PATCH 6/7] Always include tags/categories array in json output If posts have no tags/categories the keys where omitted in the json output. If we always include them (even if empty) we get a more reliable json structure which is easier to parse. --- lib/json_generator.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/json_generator.js b/lib/json_generator.js index f3d0671..9e98f83 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -53,8 +53,8 @@ module.exports = function(locals){ temp_post.content = temp_post.content.replace(/<[^>]+>/g, '') } } + temp_post.tags = []; if (post.tags && post.tags.length > 0) { - temp_post.tags = []; post.tags.forEach(function (tag) { if (permalinks) { temp_post.tags.push([ tag.name, tag.permalink ]); @@ -63,8 +63,8 @@ module.exports = function(locals){ } }); } + temp_post.categories = []; if (post.categories && post.categories.length > 0) { - temp_post.categories = []; post.categories.forEach(function (cate) { if (permalinks) { temp_post.categories.push([ cate.name, cate.permalink ]); From f2dfad44704cec799df8afbad19819ad96def826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCller?= Date: Thu, 11 Apr 2024 09:58:38 +0200 Subject: [PATCH 7/7] Fixed strip html issue if content is empty --- lib/json_generator.js | 6 +++--- lib/xml_generator.js | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/json_generator.js b/lib/json_generator.js index 9e98f83..870ac6c 100644 --- a/lib/json_generator.js +++ b/lib/json_generator.js @@ -49,7 +49,7 @@ module.exports = function(locals){ } else if (post._content) { temp_post.content = post._content } - if (stripHtml) { + if (stripHtml && temp_post.content) { temp_post.content = temp_post.content.replace(/<[^>]+>/g, '') } } @@ -95,8 +95,8 @@ module.exports = function(locals){ } else if (page._content) { temp_page.content = page._content } - if (stripHtml) { - temp_post.content = temp_post.content.replace(/<[^>]+>/g, '') + if (stripHtml && temp_page.content) { + temp_page.content = temp_page.content.replace(/<[^>]+>/g, '') } } res[index] = temp_page; diff --git a/lib/xml_generator.js b/lib/xml_generator.js index 9846e86..2a98a28 100644 --- a/lib/xml_generator.js +++ b/lib/xml_generator.js @@ -57,10 +57,14 @@ module.exports = function(locals){ if (stripHtml) { posts.data.forEach(function (p) { - p.content = p.content.replace(/<[^>]+>/g, ''); + if (p.content) { + p.content = p.content.replace(/<[^>]+>/g, ''); + } }); pages.data.forEach(function (p) { - p.content = p.content.replace(/<[^>]+>/g, ''); + if (p.content) { + p.content = p.content.replace(/<[^>]+>/g, ''); + } }); }