From fc98d5b28a9ed2d35885117ba856351dcc82333a Mon Sep 17 00:00:00 2001 From: Wilfredo Colon Date: Fri, 8 Mar 2024 09:39:19 -0500 Subject: [PATCH] Wcolon/devo 475b (#204) * Using simplecast api to pull episode info to send to Algolia * Update CircleCI configuration and Simplecast API integration * Update Algolia search index in podcast page * Fix Algolia search index name * Refactor getBatch function and updateRecent function * Fix null episode handling * Update Readme * Deepsource Fixes * More Deepsource fixes --- .circleci/config.yml | 5 +- Dockerfile | 1 + README.md | 6 +- nuxt.config.ts | 1 + package-lock.json | 39 ++++++++ package.json | 1 + pages/podcast/index.vue | 2 +- server/api/[indexType]_index.ts | 158 +++++++++++++++++++++++++------- 8 files changed, 171 insertions(+), 42 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fd7361b..6377861 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -74,7 +74,7 @@ jobs: deploy: docker: - - image: circleci/python:3.6 + - image: cimg/python:3.7 parameters: env: type: enum @@ -86,8 +86,7 @@ jobs: steps: - checkout - setup_npmrc - - setup_remote_docker: - version: 20.10.12 + - setup_remote_docker - run: name: Deploy environment: diff --git a/Dockerfile b/Dockerfile index 31e877a..e2e383b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,7 @@ ARG HTL_JS ARG SENTRY_DSN ARG ADMIN_CMS_ROOT ARG VALID_TOKEN +ARG SIMPLECAST_URL WORKDIR /code diff --git a/README.md b/README.md index e406828..668b58f 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,4 @@ docker run -p 3000:3000 -e HOST=0.0.0.0 radiolab-vue -e API_URL=https://api.demo ## Search Index -Keyword search and archive filtering is driven by the [Algolia](https://algolia.com/dashboard) search engine. Credentials for Algolia are in 1Password. Log in to the dashboard to view the indices (one for demo and one prod) as well as to retrieve the API keys and application ID that will need to be set in the `.env` file. There is only one set of API credentials for both demo and prod, so make sure the index name is set appropriately. The indexer fetches episode data from the Publisher API and sends it to Algolia. There are two options, one to do an update of the 10 most recent episodes another to rebuild the index from scratch. The refresh of recent episode is available via URL at `/update-index` and is invoked nightly via Zapier. The update and full rebuild are also available as command-line options: - -`npm run updateIndex` - -`npm run rebuildIndex` \ No newline at end of file +Keyword search and archive filtering is driven by the [Algolia](https://algolia.com/dashboard) search engine. Credentials for Algolia are in 1Password. Log in to the dashboard to view the indices (one for demo and one prod) as well as to retrieve the API keys, application ID and index name that will need to be set in the `.env` file. There is only one set of API credentials for both demo and prod, so make sure the index name is set appropriately (radiolab and Radiolab Demo). The indexer fetches episode data from the Simplecast API and sends it to Algolia. There are two options, one to do an update of the 10 most recent episodes another to rebuild the index from scratch. The refresh of recent episode is available via URL at `/api/update_index` and the full rebuild is available at `/api/all_index`. The full rebuild is only necessary if the index is corrupted or if the index is being moved to a new Algolia account. The update index is run from Zapier and triggered to run by a change in the Simplecast RSS feed for Radiolab. A token is needed to trigger both the update and all index, this is also in 1Password. \ No newline at end of file diff --git a/nuxt.config.ts b/nuxt.config.ts index a7c2694..483ec5a 100644 --- a/nuxt.config.ts +++ b/nuxt.config.ts @@ -85,6 +85,7 @@ export default { ADMIN_CMS_ROOT: process.env['ADMIN_CMS_ROOT'], GA_MEASUREMENT_ID: process.env['GA_MEASUREMENT_ID'] || 'G-T0Q62GL5TY', GTM_ID: process.env['GTM_ID'] || 'GTM-312335707', + ALGOLIA_RADIOLAB_INDEX: process.env['ALGOLIA_RADIOLAB_INDEX'], } }, } \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 0f18fe0..d3d4865 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ "axios": "^1.2.2", "debug": "4.3.3", "howler": "^2.2.3", + "image-size": "^1.1.1", "mosha-vue-toastify": "^1.0.23", "primeflex": "^3.3.0", "primeicons": "^5.0.0", @@ -7161,6 +7162,20 @@ "node": ">= 4" } }, + "node_modules/image-size": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.1.1.tgz", + "integrity": "sha512-541xKlUw6jr/6gGuk92F+mYM5zaFAc5ahphvkqvNe2bQ6gVBkd6bfrmVJ2t4KDAfikAYZyIqTnktX3i6/aQDrQ==", + "dependencies": { + "queue": "6.0.2" + }, + "bin": { + "image-size": "bin/image-size.js" + }, + "engines": { + "node": ">=16.x" + } + }, "node_modules/immutable": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/immutable/-/immutable-4.3.0.tgz", @@ -9968,6 +9983,14 @@ "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==", "devOptional": true }, + "node_modules/queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "dependencies": { + "inherits": "~2.0.3" + } + }, "node_modules/queue-microtask": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", @@ -17217,6 +17240,14 @@ "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz", "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==" }, + "image-size": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.1.1.tgz", + "integrity": "sha512-541xKlUw6jr/6gGuk92F+mYM5zaFAc5ahphvkqvNe2bQ6gVBkd6bfrmVJ2t4KDAfikAYZyIqTnktX3i6/aQDrQ==", + "requires": { + "queue": "6.0.2" + } + }, "immutable": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/immutable/-/immutable-4.3.0.tgz", @@ -19248,6 +19279,14 @@ "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==", "devOptional": true }, + "queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "requires": { + "inherits": "~2.0.3" + } + }, "queue-microtask": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", diff --git a/package.json b/package.json index b2a8fb1..3d69012 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "axios": "^1.2.2", "debug": "4.3.3", "howler": "^2.2.3", + "image-size": "^1.1.1", "mosha-vue-toastify": "^1.0.23", "primeflex": "^3.3.0", "primeicons": "^5.0.0", diff --git a/pages/podcast/index.vue b/pages/podcast/index.vue index 071890b..a642771 100644 --- a/pages/podcast/index.vue +++ b/pages/podcast/index.vue @@ -5,7 +5,7 @@ import { useRuntimeConfig } from '#app' const config = useRuntimeConfig() const apiUrl = `${config.API_URL}/api/v3/channel/shows/radiolab/recent_stories/` /*Algolia Search START*/ -const { result, search } = useAlgoliaSearch('radiolab') // pass your index name as param +const { result, search } = useAlgoliaSearch(config.ALGOLIA_RADIOLAB_INDEX) // pass your index name as param const searchTerm = ref('') const searchYear = ref('') const searchPage = ref(0) diff --git a/server/api/[indexType]_index.ts b/server/api/[indexType]_index.ts index 6a24654..3604f53 100644 --- a/server/api/[indexType]_index.ts +++ b/server/api/[indexType]_index.ts @@ -1,52 +1,140 @@ import axios from 'axios'; import algoliasearch from 'algoliasearch'; +import sizeOf from 'image-size'; +import https from 'https'; +import { URL } from 'url'; +interface Season { + href: string; + number: number; +} + +interface Episode { + updated_at: string; + type: string; + token: string; + title: string; + status: string; + slug: string; + season: Season; + scheduled_for: null | string; + published_at: string; + number: number; + is_hidden: boolean; + image_url: string; + image_path: string; + id: string; + href: string; + guid: string; + feeds: null | string; + enclosure_url: string; + duration: number; + description: string; + days_since_release: number; + audio_status: string; + analytics: null | string; +} + +interface Dimensions { + width: number; + height: number; +} + +/** + * Get the image dimensions from a URL + * @param url The URL to get the image dimensions from + * @returns {Promise<{width: number, height: number}>} The width and height of the image + */ +const getImageDimensions = (url: string) => { + const options = new URL(url); + return new Promise((resolve) => { + https.get(options, (response) => { + const chunks = []; + response + .on('data', (chunk) => { + chunks.push(chunk); + }) + .on('end', () => { + const buffer = Buffer.concat(chunks); + resolve(sizeOf(buffer)); + }); + }); + }); +} -const config = useRuntimeConfig(); /** * Instantiates an Algolia client and returns the Radiolab index. * @returns {Promise} */ const getIndex = async () => { const client = await algoliasearch(process.env.ALGOLIA_APP_ID, process.env.ALGOLIA_ADMIN_API_KEY); - return client.initIndex('radiolab'); + return client.initIndex(process.env.ALGOLIA_RADIOLAB_INDEX); }; /** - * Fetches episodes from publisher's API and returns them in a format suitable for indexing. - * @param page Page number to fetch - * @returns episode data to be indexed + * Creates the main image for an episode. + * @param episode - The episode object. + * @returns The main image object with URL, width, and height. */ -const getBatch = async ( page: number ) => { - const recent = await axios.get(`${config.public.API_URL}/api/v3/channel/shows/radiolab/recent_stories/${page}`); - if (recent.status === 200) { - const episodes = recent.data.included - .filter(episode => episode.attributes["audio-may-download"]) // filter out episodes that don't have audio - .map(episode => { - const publishTime = new Date(episode.attributes["publish-at"]).getTime(); - return { - objectID: episode.attributes["cms-pk"], // Algolia's unique identifier - slug: episode.attributes.slug, - title: episode.attributes.title, - audio: episode.attributes.audio, - description: episode.attributes.body, - tease: episode.attributes.tease, +const createImageMain = async (episode: Episode) => { + if (!episode?.image_url){ + return null; + } else { + const dimensions: Dimensions = await getImageDimensions(episode?.image_url) as Dimensions; + episode['image-main'] = { + url: episode.image_url, + width: dimensions.width, + height: dimensions.height + }; + return episode['image-main']; +} +}; + + +/** + * Retrieves a batch of episodes from Simplecast. + * @returns {Promise} A promise that resolves to an array of episodes. + * @throws {Error} If the request to Simplecast fails. + */ +const getBatch = async (url: string) => { + const response = await axios.get(url); + + if (response.status === 200) { + const episodes: Array> = []; + const pages = response.data.pages; + const collection = response.data.collection; + for (const episode of collection) { + // Skip if episode is null + if (!episode) { + continue; + } + const publishTime = (new Date(episode.published_at).getTime())/1000; + const imageMain = await createImageMain(episode); + episodes.push({ + objectID: episode.id, + slug: episode.slug, + title: episode.title, + audio: episode.enclosure_url, + description: episode.description, + tease: episode.description, publishTime, - "publish-at": episode.attributes["publish-at"], - "date-line-ts": episode.attributes["date-line-ts"], - "estimated-duration": episode.attributes["estimated-duration"], - "image-main": episode.attributes["image-main"] - }; - }) + "publish-at": episode.published_at, + "date-line-ts": (new Date(episode.updated_at).getTime())/1000, + "estimated-duration": episode.duration, + "image-main": imageMain + }); + } + episodes.pages = pages; return episodes; } - throw new Error(`Failed to retrieve recent page ${page}`); + throw new Error(`Failed to retrieve episodes from Simplecast. Status: ${response.status}`); }; /** * Updates the index with the most recent episodes. */ const updateRecent = async () => { - const episodes = await getBatch(1); + const url = `${process.env.SIMPLECAST_URL}&limit=10`; + const episodes = await getBatch(url); (await getIndex()).saveObjects(episodes).then(() => { //Not doing anything with the response }).catch((e) => { @@ -58,12 +146,15 @@ const updateRecent = async () => { * Indexes all RadioLab episodes. */ const indexAll = async () => { - let pageNum = 1; - const episodes = []; - let page = await getBatch(pageNum); - while (page.length > 0) { - episodes.push(...page); - page = await getBatch(pageNum++); + // On the first run it will set the next url to null but on subsequent runs it will be set to the next url + //if the next url is null, we are done + let url = process.env.SIMPLECAST_URL; + let episodes = []; + + while (url) { + const batch = await getBatch(url); + episodes = episodes.concat(batch); + url = batch.pages?.next?.href; } (await getIndex()).replaceAllObjects(episodes).then(() => { //Not doing anything with the response @@ -109,6 +200,7 @@ export default defineEventHandler(async (event) => { return {status: 404, body: 'Not Found'}; } } catch (e) { + console.log(e); return {status: 500, body: e.message}; } }); \ No newline at end of file