From c3a903c0cd4e33df558040b31a86a282920bde34 Mon Sep 17 00:00:00 2001 From: Eakampreet Date: Thu, 17 Nov 2022 12:35:48 -0500 Subject: [PATCH] Add function to check if given url is a feed url Return url if a feed url is passed Add tests for isFeedUrl function Fix failing tests --- src/api/feed-discovery/src/middleware.js | 11 +++++- src/api/feed-discovery/src/util.js | 22 ++++++++++++ src/api/feed-discovery/test/router.test.js | 18 +++++----- src/api/feed-discovery/test/util.test.js | 39 ++++++++++++++++++++++ 4 files changed, 80 insertions(+), 10 deletions(-) diff --git a/src/api/feed-discovery/src/middleware.js b/src/api/feed-discovery/src/middleware.js index 29188c23ae..e97865c9e9 100644 --- a/src/api/feed-discovery/src/middleware.js +++ b/src/api/feed-discovery/src/middleware.js @@ -1,6 +1,6 @@ const { logger, createError } = require('@senecacdot/satellite'); -const { isTwitchUrl, toTwitchFeedUrl, getBlogBody, getFeedUrls } = require('./util'); +const { isTwitchUrl, toTwitchFeedUrl, isFeedUrl, getBlogBody, getFeedUrls } = require('./util'); // A middleware to ensure we get an array module.exports.checkForArray = () => { @@ -43,6 +43,15 @@ module.exports.discoverFeedUrls = () => { }; } + if (await isFeedUrl(url)) { + return [ + { + feedUrl: url, + type: 'blog', + }, + ]; + } + // Otherwise, try to parse out the feed URL from the body of the page const body = await getBlogBody(url); if (!body) { diff --git a/src/api/feed-discovery/src/util.js b/src/api/feed-discovery/src/util.js index 55dd7b32b0..8c49b85b4f 100644 --- a/src/api/feed-discovery/src/util.js +++ b/src/api/feed-discovery/src/util.js @@ -20,6 +20,27 @@ const toTwitchFeedUrl = (twitchChannelUrl) => { throw new Error('not a Twitch URL'); }; +const isFeedUrl = async (url) => { + try { + const { statusCode, headers } = await got(url); + const contentType = headers['content-type']; + const validContentTypes = [ + 'application/xml', + 'application/rss+xml', + 'application/atom+xml', + 'application/x.atom+xml', + 'application/x-atom+xml', + 'application/json', + 'application/json+oembed', + 'application/xml+oembed', + ]; + + return statusCode === 200 && validContentTypes.some((ct) => contentType.includes(ct)); + } catch (err) { + return false; + } +}; + const getBlogBody = async (blogUrl) => { try { logger.debug({ blogUrl }, 'Getting blog body'); @@ -101,3 +122,4 @@ module.exports.getBlogBody = getBlogBody; module.exports.getFeedUrls = getFeedUrls; module.exports.isTwitchUrl = isTwitchUrl; module.exports.toTwitchFeedUrl = toTwitchFeedUrl; +module.exports.isFeedUrl = isFeedUrl; diff --git a/src/api/feed-discovery/test/router.test.js b/src/api/feed-discovery/test/router.test.js index 3b17108270..8d6fe8a626 100644 --- a/src/api/feed-discovery/test/router.test.js +++ b/src/api/feed-discovery/test/router.test.js @@ -31,7 +31,7 @@ describe('POST /', () => { }; // Mocking the response body html when call GET request to blog url - nock(blogUrl).get('/').reply(200, mockBlogUrlResponseBody, { + nock(blogUrl).get('/').twice().reply(200, mockBlogUrlResponseBody, { 'Content-Type': 'text/html', }); @@ -56,7 +56,7 @@ describe('POST /', () => { `; // Mocking the response body html when call GET request to blog url - nock(blogUrl1).get('/').reply(200, mockBlogUrl1ResponseBody, { + nock(blogUrl1).get('/').twice().reply(200, mockBlogUrl1ResponseBody, { 'Content-Type': 'text/html', }); @@ -72,7 +72,7 @@ describe('POST /', () => { `; // Mocking the response body html when call GET request to blog url - nock(blogUrl2).get('/').reply(200, mockBlogUrl2ResponseBody, { + nock(blogUrl2).get('/').twice().reply(200, mockBlogUrl2ResponseBody, { 'Content-Type': 'text/html', }); @@ -131,7 +131,7 @@ describe('POST /', () => { `; // Mocking the response body html (send back nothing) when call GET request to blog url - nock(blogUrl).get('/').reply(200, mockBlogBody, { + nock(blogUrl).get('/').twice().reply(200, mockBlogBody, { 'Content-Type': 'text/html', }); @@ -199,7 +199,7 @@ describe('POST /', () => { }; // Mocking the response body html when call GET request to blog url - nock(blogUrl).get('/').reply(200, mockBlogUrlResponseBody, { + nock(blogUrl).get('/').twice().reply(200, mockBlogUrlResponseBody, { 'Content-Type': 'text/html', }); @@ -227,7 +227,7 @@ describe('POST /', () => { }; // Mocking the response body html when call GET request to blog url - nock(blogUrl).get('/').reply(200, mockBlogUrlResponseBody, { + nock(blogUrl).get('/').twice().reply(200, mockBlogUrlResponseBody, { 'Content-Type': 'text/html', }); @@ -255,7 +255,7 @@ describe('POST /', () => { }; // Mocking the response body html when call GET request to blog url - nock(blogUrl).get('/').reply(200, mockBlogUrlResponseBody, { + nock(blogUrl).get('/').twice().reply(200, mockBlogUrlResponseBody, { 'Content-Type': 'text/html', }); @@ -294,7 +294,7 @@ describe('POST /', () => { }; // Mocking the response body html when call GET request to blog url - nock(blogUrl).get('/').reply(200, mockBlogUrlResponseBody, { + nock(blogUrl).get('/').twice().reply(200, mockBlogUrlResponseBody, { 'Content-Type': 'text/html', }); @@ -329,7 +329,7 @@ describe('POST /', () => { }; // Mocking the response body html when call GET request to blog url - nock(youTubeDomain).get(channelUri).reply(200, mockYouTubeChannelUrlResponseBody, { + nock(youTubeDomain).get(channelUri).twice().reply(200, mockYouTubeChannelUrlResponseBody, { 'Content-Type': 'text/html', }); diff --git a/src/api/feed-discovery/test/util.test.js b/src/api/feed-discovery/test/util.test.js index 321f5fb9ec..e0a72ed442 100644 --- a/src/api/feed-discovery/test/util.test.js +++ b/src/api/feed-discovery/test/util.test.js @@ -3,6 +3,7 @@ const nock = require('nock'); const { isTwitchUrl, toTwitchFeedUrl, + isFeedUrl, getBlogBody, getFeedUrlType, getFeedUrls, @@ -31,6 +32,44 @@ describe('util.js', () => { ); }); + test('isFeedUrl returns true for a feed url', () => { + const feedUrl = 'https://blog.com/feed/user/'; + + [ + 'application/xml', + 'application/rss+xml', + 'application/atom+xml', + 'application/x.atom+xml', + 'application/x-atom+xml', + 'application/json', + 'application/json+oembed', + 'application/xml+oembed', + ].forEach(async (type) => { + nock(feedUrl).get('/').reply(200, undefined, { 'Content-Type': type }); + expect(await isFeedUrl(feedUrl)).toBe(true); + }); + }); + + test('isFeedUrl returns false when given URL returns a non 200 status', async () => { + const feedUrl = 'https://blog.com/feed/user/'; + nock(feedUrl).get('/').reply(404, 'Not Found'); + + expect(await isFeedUrl(feedUrl)).toBe(false); + }); + + test('isFeedUrl returns false if given URL returns a non feed content type', async () => { + const feedUrl = 'https://blog.com/user/'; + nock(feedUrl).get('/').reply(200, '', { 'Content-Type': 'text/html' }); + + expect(await isFeedUrl(feedUrl)).toBe(false); + }); + + test('isFeedUrl returns false if given an invalid URL is given', async () => { + const feedUrl = 'Not a URL'; + + expect(await isFeedUrl(feedUrl)).toBe(false); + }); + test('getBlogBody returns the expected body for a given URL', async () => { const blogUrl = 'https://test321.blogspot.com/'; const mockBlogUrlResponseBody = `