From f25806464b88e7c004f96c12215fa0b21789bd67 Mon Sep 17 00:00:00 2001 From: Eakampreet Date: Thu, 17 Nov 2022 12:35:48 -0500 Subject: [PATCH] Add function to check if given url is a feed url Return url if a feed url is passed Add tests for isFeedUrl function --- src/api/feed-discovery/src/middleware.js | 11 ++++++- src/api/feed-discovery/src/util.js | 22 +++++++++++++ src/api/feed-discovery/test/util.test.js | 39 ++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/src/api/feed-discovery/src/middleware.js b/src/api/feed-discovery/src/middleware.js index 29188c23ae..e97865c9e9 100644 --- a/src/api/feed-discovery/src/middleware.js +++ b/src/api/feed-discovery/src/middleware.js @@ -1,6 +1,6 @@ const { logger, createError } = require('@senecacdot/satellite'); -const { isTwitchUrl, toTwitchFeedUrl, getBlogBody, getFeedUrls } = require('./util'); +const { isTwitchUrl, toTwitchFeedUrl, isFeedUrl, getBlogBody, getFeedUrls } = require('./util'); // A middleware to ensure we get an array module.exports.checkForArray = () => { @@ -43,6 +43,15 @@ module.exports.discoverFeedUrls = () => { }; } + if (await isFeedUrl(url)) { + return [ + { + feedUrl: url, + type: 'blog', + }, + ]; + } + // Otherwise, try to parse out the feed URL from the body of the page const body = await getBlogBody(url); if (!body) { diff --git a/src/api/feed-discovery/src/util.js b/src/api/feed-discovery/src/util.js index 55dd7b32b0..8c49b85b4f 100644 --- a/src/api/feed-discovery/src/util.js +++ b/src/api/feed-discovery/src/util.js @@ -20,6 +20,27 @@ const toTwitchFeedUrl = (twitchChannelUrl) => { throw new Error('not a Twitch URL'); }; +const isFeedUrl = async (url) => { + try { + const { statusCode, headers } = await got(url); + const contentType = headers['content-type']; + const validContentTypes = [ + 'application/xml', + 'application/rss+xml', + 'application/atom+xml', + 'application/x.atom+xml', + 'application/x-atom+xml', + 'application/json', + 'application/json+oembed', + 'application/xml+oembed', + ]; + + return statusCode === 200 && validContentTypes.some((ct) => contentType.includes(ct)); + } catch (err) { + return false; + } +}; + const getBlogBody = async (blogUrl) => { try { logger.debug({ blogUrl }, 'Getting blog body'); @@ -101,3 +122,4 @@ module.exports.getBlogBody = getBlogBody; module.exports.getFeedUrls = getFeedUrls; module.exports.isTwitchUrl = isTwitchUrl; module.exports.toTwitchFeedUrl = toTwitchFeedUrl; +module.exports.isFeedUrl = isFeedUrl; diff --git a/src/api/feed-discovery/test/util.test.js b/src/api/feed-discovery/test/util.test.js index 321f5fb9ec..e0a72ed442 100644 --- a/src/api/feed-discovery/test/util.test.js +++ b/src/api/feed-discovery/test/util.test.js @@ -3,6 +3,7 @@ const nock = require('nock'); const { isTwitchUrl, toTwitchFeedUrl, + isFeedUrl, getBlogBody, getFeedUrlType, getFeedUrls, @@ -31,6 +32,44 @@ describe('util.js', () => { ); }); + test('isFeedUrl returns true for a feed url', () => { + const feedUrl = 'https://blog.com/feed/user/'; + + [ + 'application/xml', + 'application/rss+xml', + 'application/atom+xml', + 'application/x.atom+xml', + 'application/x-atom+xml', + 'application/json', + 'application/json+oembed', + 'application/xml+oembed', + ].forEach(async (type) => { + nock(feedUrl).get('/').reply(200, undefined, { 'Content-Type': type }); + expect(await isFeedUrl(feedUrl)).toBe(true); + }); + }); + + test('isFeedUrl returns false when given URL returns a non 200 status', async () => { + const feedUrl = 'https://blog.com/feed/user/'; + nock(feedUrl).get('/').reply(404, 'Not Found'); + + expect(await isFeedUrl(feedUrl)).toBe(false); + }); + + test('isFeedUrl returns false if given URL returns a non feed content type', async () => { + const feedUrl = 'https://blog.com/user/'; + nock(feedUrl).get('/').reply(200, '', { 'Content-Type': 'text/html' }); + + expect(await isFeedUrl(feedUrl)).toBe(false); + }); + + test('isFeedUrl returns false if given an invalid URL is given', async () => { + const feedUrl = 'Not a URL'; + + expect(await isFeedUrl(feedUrl)).toBe(false); + }); + test('getBlogBody returns the expected body for a given URL', async () => { const blogUrl = 'https://test321.blogspot.com/'; const mockBlogUrlResponseBody = `