Skip to content

Commit

Permalink
✨ Adds consistent redirects handling for cf workers. (#3895)
Browse files Browse the repository at this point in the history
Cloudflare has support for redirects but only up to a limited number (I think 2000?). We have a lot of chart redirects in the chart_redirects table (about 1600 or so). Because everything under the /grapher/ route on our website has to run through a CF function anyways (to update the thumbnail image to include the correct query string), we decided to bake these redirects into a json file and do custom redirects handling in the CF function. 

We did this for html pages already but not for thumbnails, but now that we also have `.config.json` requests to worry about and would like to have consistent redirects handling that doesn't overly complicate the logic of our handlers.

This PR makes it so that both requests for the html pages at `/grapher/[slug]` and requests for `/grapher/[slug].config.json` will be redirected according to the _grapherRedirects.json file using a catch error handler. This is done as a catch handler that checks for 404 pages so that the common, happy path does not have to fetch the redirects file.
  • Loading branch information
danyx23 authored Sep 10, 2024
2 parents d6b9646 + d25b1ef commit 3a153b9
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 87 deletions.
33 changes: 22 additions & 11 deletions functions/_common/grapherRenderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
} from "@ourworldindata/utils"
import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm"
import { TimeLogger } from "./timeLogger"
import { png } from "itty-router"
import { png, StatusError } from "itty-router"

import svg2png_wasm from "../../node_modules/svg2png-wasm/svg2png_wasm_bg.wasm"

Expand Down Expand Up @@ -152,6 +152,8 @@ async function fetchFromR2(
headers,
}
const primaryResponse = await fetch(url.toString(), init)
// The fallback URL here is used so that on staging or dev we can fallback
// to the production bucket if the file is not found in the branch bucket
if (primaryResponse.status === 404 && fallbackUrl) {
return fetch(fallbackUrl.toString(), init)
}
Expand Down Expand Up @@ -215,6 +217,13 @@ export async function fetchGrapherConfig(
): Promise<FetchGrapherConfigResult> {
const fetchResponse = await fetchUnparsedGrapherConfig(slug, env, etag)

if (fetchResponse.status === 404) {
// we throw 404 errors instead of returning a 404 response so that the router
// catch handler can do a lookup in the redirects file and maybe send
// a 302 redirect response
throw new StatusError(404)
}

if (fetchResponse.status !== 200) {
console.log(
"Status code is not 200, returning empty response with status code",
Expand All @@ -241,13 +250,16 @@ async function fetchAndRenderGrapherToSvg(
options: ImageOptions,
searchParams: URLSearchParams,
env: Env
) {
): Promise<string> {
const grapherLogger = new TimeLogger("grapher")

const grapherConfigResponse = await fetchGrapherConfig(slug, env)

if (grapherConfigResponse.status !== 200) {
return null
if (grapherConfigResponse.status === 404) {
// we throw 404 errors instad of returning a 404 response so that the router
// catch handler can do a lookup in the redirects file and maybe send
// a 302 redirect response
throw new StatusError(grapherConfigResponse.status)
}

const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight)
Expand Down Expand Up @@ -300,10 +312,6 @@ export const fetchAndRenderGrapher = async (
)
console.log("fetched svg")

if (!svg) {
return new Response("Not found", { status: 404 })
}

switch (outType) {
case "png":
return png(await renderSvgToPng(svg, options))
Expand Down Expand Up @@ -341,7 +349,7 @@ export async function getOptionalRedirectForSlug(
slug: string,
baseUrl: URL,
env: Env
) {
): Promise<string | undefined> {
const redirects: Record<string, string> = await env.ASSETS.fetch(
new URL("/grapher/_grapherRedirects.json", baseUrl),
{ cf: { cacheTtl: 2 * 60 } }
Expand All @@ -354,8 +362,11 @@ export async function getOptionalRedirectForSlug(
return redirects[slug]
}

export function createRedirectResponse(redirSlug: string, currentUrl: URL) {
new Response(null, {
export function createRedirectResponse(
redirSlug: string,
currentUrl: URL
): Response {
return new Response(null, {
status: 302,
headers: { Location: `/grapher/${redirSlug}${currentUrl.search}` },
})
Expand Down
138 changes: 62 additions & 76 deletions functions/grapher/[slug].ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,22 @@ import {
createRedirectResponse,
fetchUnparsedGrapherConfig,
} from "../_common/grapherRenderer.js"
import { IRequestStrict, Router, error, cors } from "itty-router"
import { IRequestStrict, Router, StatusError, error, cors } from "itty-router"

const { preflight, corsify } = cors({
allowMethods: ["GET", "OPTIONS", "HEAD"],
})
const extensions = {
configJson: ".config.json",
}

const router = Router<IRequestStrict, [URL, Env, string]>({
before: [preflight],
finally: [corsify],
})
router
.get(
"/grapher/:slug.config.json",
`/grapher/:slug${extensions.configJson}`,
async ({ params: { slug } }, { searchParams }, env, etag) =>
handleConfigRequest(slug, searchParams, env, etag)
)
Expand All @@ -41,39 +44,64 @@ export const onRequest: PagesFunction = async (context) => {
{ ...env, url },
request.headers.get("if-none-match")
)
.catch((e) => error(500, e))
.catch(async (e) => {
// Here we do a unified after the fact handling of 404s to check
// if we have a redirect in the _grapherRedirects.json file.
// This is done as a catch handler that checks for 404 pages
// so that the common, happy path does not have to fetch the redirects file.
if (e instanceof StatusError && e.status === 404) {
console.log("Handling 404 for ", url.pathname)

const fullslug = url.pathname.split("/").pop() as string

const allExtensions = Object.values(extensions)
.map((ext) => ext.replace(".", "\\.")) // for the regex make sure we match only a single dot, not any character
.join("|")
const regexForKnownExtensions = new RegExp(
`^(?<slug>.*?)(?<extension>${allExtensions})?$`
)

const matchResult = fullslug.match(regexForKnownExtensions)
const slug = matchResult?.groups?.slug ?? fullslug
const extension = matchResult?.groups?.extension ?? ""

if (slug.toLowerCase() !== slug)
return createRedirectResponse(
`${slug.toLowerCase()}${extension}`,
url
)

console.log("Looking up slug and extension", {
slug,
extension,
})

const redirectSlug = await getOptionalRedirectForSlug(
slug,
url,
{
...env,
url,
} as Env
)
console.log("Redirect slug", redirectSlug)
if (redirectSlug && redirectSlug !== slug) {
return createRedirectResponse(
`${redirectSlug}${extension}`,
url
)
} else return error(404, "Not found")
}
return error(500, e)
})
}

async function handleHtmlPageRequest(
slug: string,
searchParams: URLSearchParams,
_searchParams: URLSearchParams,
env: Env
) {
const url = env.url
// Redirects handling is performed by the worker, and is done by fetching the (baked) _grapherRedirects.json file.
// That file is a mapping from old slug to new slug.

/**
* REDIRECTS HANDLING:
* We want to optimize for the case where the user visits a page using the correct slug, i.e. there's no redirect.
* That's why:
* 1. We first check if the slug is lowercase. If it's not, we convert it to lowercase _and check for any redirects already_, and send a redirect already.
* 2. If the slug is lowercase, we check if we can find the page at the requested slug. If we can find it, we return it already.
* 3. If we can't find it, we _then_ check if there's a redirect for it. If there is, we redirect to the new page.
*/

// All our grapher slugs are lowercase by convention.
// To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version.
const lowerCaseSlug = slug.toLowerCase()
if (lowerCaseSlug !== slug) {
const redirectSlug = await getOptionalRedirectForSlug(
lowerCaseSlug,
url,
env
)

return createRedirectResponse(redirectSlug ?? lowerCaseSlug, url)
}

// For local testing
// const grapherPageResp = await fetch(
Expand All @@ -84,25 +112,17 @@ async function handleHtmlPageRequest(
const grapherPageResp = await env.ASSETS.fetch(url, { redirect: "manual" })

if (grapherPageResp.status === 404) {
// If the request is a 404, we check if there's a redirect for it.
// If there is, we redirect to the new page.
const redirectSlug = await getOptionalRedirectForSlug(slug, url, env)
if (redirectSlug && redirectSlug !== slug) {
return createRedirectResponse(redirectSlug, url)
} else {
// Otherwise we just return the 404 page.
return grapherPageResp
}
throw new StatusError(404)
}

// A non-200 status code is most likely a redirect (301 or 302) or a 404, all of which we want to pass through as-is.
// A non-200 status code is most likely a redirect (301 or 302), all of which we want to pass through as-is.
// In the case of the redirect, the browser will then request the new URL which will again be handled by this worker.
if (grapherPageResp.status !== 200) return grapherPageResp

const openGraphThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=og${
const openGraphThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=og${
url.search ? "&" + url.search.slice(1) : ""
}`
const twitterThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=twitter${
const twitterThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=twitter${
url.search ? "&" + url.search.slice(1) : ""
}`

Expand Down Expand Up @@ -146,21 +166,6 @@ async function handleConfigRequest(
) {
const shouldCache = searchParams.get("nocache") === null
console.log("Preparing json response for ", slug)
// All our grapher slugs are lowercase by convention.
// To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version.
const lowerCaseSlug = slug.toLowerCase()
if (lowerCaseSlug !== slug) {
const redirectSlug = await getOptionalRedirectForSlug(
lowerCaseSlug,
env.url,
env
)

return createRedirectResponse(
`${redirectSlug ?? lowerCaseSlug}.config.json`,
env.url
)
}

const grapherPageResp = await fetchUnparsedGrapherConfig(slug, env, etag)

Expand All @@ -169,29 +174,10 @@ async function handleConfigRequest(
return new Response(null, { status: 304 })
}

if (grapherPageResp.status !== 200) {
// If the request is a 404, we check if there's a redirect for it.
// If there is, we redirect to the new page.
const redirectSlug = await getOptionalRedirectForSlug(
slug,
env.url,
env
)
if (redirectSlug && redirectSlug !== slug) {
console.log("Redirecting to ", redirectSlug)
return createRedirectResponse(
`${redirectSlug}.config.json`,
env.url
)
} else {
console.log("Returning 404 for ", slug)
// Otherwise we just return the status code.
return new Response(null, { status: grapherPageResp.status })
}
if (grapherPageResp.status === 404) {
throw new StatusError(404)
}

console.log("Returning 200 for ", slug)

const cacheControl = shouldCache
? "public, s-maxage=3600, max-age=0, must-revalidate"
: "public, s-maxage=0, max-age=0, must-revalidate"
Expand Down

0 comments on commit 3a153b9

Please sign in to comment.