Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Adds consistent redirects handling for cf workers. #3895

Merged
merged 1 commit into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions functions/_common/grapherRenderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
} from "@ourworldindata/utils"
import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm"
import { TimeLogger } from "./timeLogger"
import { png } from "itty-router"
import { png, StatusError } from "itty-router"

import svg2png_wasm from "../../node_modules/svg2png-wasm/svg2png_wasm_bg.wasm"

Expand Down Expand Up @@ -152,6 +152,8 @@ async function fetchFromR2(
headers,
}
const primaryResponse = await fetch(url.toString(), init)
// The fallback URL here is used so that on staging or dev we can fallback
// to the production bucket if the file is not found in the branch bucket
if (primaryResponse.status === 404 && fallbackUrl) {
return fetch(fallbackUrl.toString(), init)
}
Expand Down Expand Up @@ -215,6 +217,13 @@ export async function fetchGrapherConfig(
): Promise<FetchGrapherConfigResult> {
const fetchResponse = await fetchUnparsedGrapherConfig(slug, env, etag)

if (fetchResponse.status === 404) {
// we throw 404 errors instead of returning a 404 response so that the router
// catch handler can do a lookup in the redirects file and maybe send
// a 302 redirect response
throw new StatusError(404)
}

if (fetchResponse.status !== 200) {
console.log(
"Status code is not 200, returning empty response with status code",
Expand All @@ -241,13 +250,16 @@ async function fetchAndRenderGrapherToSvg(
options: ImageOptions,
searchParams: URLSearchParams,
env: Env
) {
): Promise<string> {
const grapherLogger = new TimeLogger("grapher")

const grapherConfigResponse = await fetchGrapherConfig(slug, env)

if (grapherConfigResponse.status !== 200) {
return null
if (grapherConfigResponse.status === 404) {
// we throw 404 errors instad of returning a 404 response so that the router
// catch handler can do a lookup in the redirects file and maybe send
// a 302 redirect response
throw new StatusError(grapherConfigResponse.status)
}

const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight)
Expand Down Expand Up @@ -300,10 +312,6 @@ export const fetchAndRenderGrapher = async (
)
console.log("fetched svg")

if (!svg) {
return new Response("Not found", { status: 404 })
}

switch (outType) {
case "png":
return png(await renderSvgToPng(svg, options))
Expand Down Expand Up @@ -341,7 +349,7 @@ export async function getOptionalRedirectForSlug(
slug: string,
baseUrl: URL,
env: Env
) {
): Promise<string | undefined> {
const redirects: Record<string, string> = await env.ASSETS.fetch(
new URL("/grapher/_grapherRedirects.json", baseUrl),
{ cf: { cacheTtl: 2 * 60 } }
Expand All @@ -354,8 +362,11 @@ export async function getOptionalRedirectForSlug(
return redirects[slug]
}

export function createRedirectResponse(redirSlug: string, currentUrl: URL) {
new Response(null, {
export function createRedirectResponse(
redirSlug: string,
currentUrl: URL
): Response {
return new Response(null, {
status: 302,
headers: { Location: `/grapher/${redirSlug}${currentUrl.search}` },
})
Expand Down
138 changes: 62 additions & 76 deletions functions/grapher/[slug].ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,22 @@ import {
createRedirectResponse,
fetchUnparsedGrapherConfig,
} from "../_common/grapherRenderer.js"
import { IRequestStrict, Router, error, cors } from "itty-router"
import { IRequestStrict, Router, StatusError, error, cors } from "itty-router"

const { preflight, corsify } = cors({
allowMethods: ["GET", "OPTIONS", "HEAD"],
})
const extensions = {
configJson: ".config.json",
}

const router = Router<IRequestStrict, [URL, Env, string]>({
before: [preflight],
finally: [corsify],
})
router
.get(
"/grapher/:slug.config.json",
`/grapher/:slug${extensions.configJson}`,
async ({ params: { slug } }, { searchParams }, env, etag) =>
handleConfigRequest(slug, searchParams, env, etag)
)
Expand All @@ -41,39 +44,64 @@ export const onRequest: PagesFunction = async (context) => {
{ ...env, url },
request.headers.get("if-none-match")
)
.catch((e) => error(500, e))
.catch(async (e) => {
// Here we do a unified after the fact handling of 404s to check
// if we have a redirect in the _grapherRedirects.json file.
// This is done as a catch handler that checks for 404 pages
// so that the common, happy path does not have to fetch the redirects file.
if (e instanceof StatusError && e.status === 404) {
console.log("Handling 404 for ", url.pathname)

const fullslug = url.pathname.split("/").pop() as string

const allExtensions = Object.values(extensions)
.map((ext) => ext.replace(".", "\\.")) // for the regex make sure we match only a single dot, not any character
.join("|")
const regexForKnownExtensions = new RegExp(
`^(?<slug>.*?)(?<extension>${allExtensions})?$`
)
Comment on lines +57 to +62
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this code will be simpler by assuming that a slug can't contain a dot, i.e. something like /^(?<slug>[^\s\.]+)(?<extension>\.\S+)?$/.

In theory we could then check after the fact that the extension is valid (and throw a fitting error), but it's also fine if we're not doing that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah true but I think I like the idea of using a list of known extensions as a communication device that is added to further up in the stack. I'm inclined to leave it as is for now.


const matchResult = fullslug.match(regexForKnownExtensions)
const slug = matchResult?.groups?.slug ?? fullslug
const extension = matchResult?.groups?.extension ?? ""
Comment on lines +64 to +66
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, what this code doesn't currently seem to handle is non-lowercase slugs.
Our rule is that a slug needs to be all-lowercase, and if it isn't we want to redirect it.

In the old code, I tried to be clever about it so we never need to run through two redirects, avoiding the scenario REDIR-SOURCE -> redir-source -> redir-target. I don't think we need to take care of that again, which means we would just do:

if (slug.toLowerCase() !== slug)
  return redir(302, slug.toLowerCase())


if (slug.toLowerCase() !== slug)
return createRedirectResponse(
`${slug.toLowerCase()}${extension}`,
url
)

console.log("Looking up slug and extension", {
slug,
extension,
})

const redirectSlug = await getOptionalRedirectForSlug(
slug,
url,
{
...env,
url,
} as Env
)
console.log("Redirect slug", redirectSlug)
if (redirectSlug && redirectSlug !== slug) {
return createRedirectResponse(
`${redirectSlug}${extension}`,
url
)
} else return error(404, "Not found")
}
return error(500, e)
})
}

async function handleHtmlPageRequest(
slug: string,
searchParams: URLSearchParams,
_searchParams: URLSearchParams,
env: Env
) {
const url = env.url
// Redirects handling is performed by the worker, and is done by fetching the (baked) _grapherRedirects.json file.
// That file is a mapping from old slug to new slug.

/**
* REDIRECTS HANDLING:
* We want to optimize for the case where the user visits a page using the correct slug, i.e. there's no redirect.
* That's why:
* 1. We first check if the slug is lowercase. If it's not, we convert it to lowercase _and check for any redirects already_, and send a redirect already.
* 2. If the slug is lowercase, we check if we can find the page at the requested slug. If we can find it, we return it already.
* 3. If we can't find it, we _then_ check if there's a redirect for it. If there is, we redirect to the new page.
*/

// All our grapher slugs are lowercase by convention.
// To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version.
const lowerCaseSlug = slug.toLowerCase()
if (lowerCaseSlug !== slug) {
const redirectSlug = await getOptionalRedirectForSlug(
lowerCaseSlug,
url,
env
)

return createRedirectResponse(redirectSlug ?? lowerCaseSlug, url)
}

// For local testing
// const grapherPageResp = await fetch(
Expand All @@ -84,25 +112,17 @@ async function handleHtmlPageRequest(
const grapherPageResp = await env.ASSETS.fetch(url, { redirect: "manual" })

if (grapherPageResp.status === 404) {
// If the request is a 404, we check if there's a redirect for it.
// If there is, we redirect to the new page.
const redirectSlug = await getOptionalRedirectForSlug(slug, url, env)
if (redirectSlug && redirectSlug !== slug) {
return createRedirectResponse(redirectSlug, url)
} else {
// Otherwise we just return the 404 page.
return grapherPageResp
}
throw new StatusError(404)
}

// A non-200 status code is most likely a redirect (301 or 302) or a 404, all of which we want to pass through as-is.
// A non-200 status code is most likely a redirect (301 or 302), all of which we want to pass through as-is.
// In the case of the redirect, the browser will then request the new URL which will again be handled by this worker.
if (grapherPageResp.status !== 200) return grapherPageResp

const openGraphThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=og${
const openGraphThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=og${
url.search ? "&" + url.search.slice(1) : ""
}`
const twitterThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=twitter${
const twitterThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=twitter${
url.search ? "&" + url.search.slice(1) : ""
}`

Expand Down Expand Up @@ -146,21 +166,6 @@ async function handleConfigRequest(
) {
const shouldCache = searchParams.get("nocache") === null
console.log("Preparing json response for ", slug)
// All our grapher slugs are lowercase by convention.
// To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version.
const lowerCaseSlug = slug.toLowerCase()
if (lowerCaseSlug !== slug) {
const redirectSlug = await getOptionalRedirectForSlug(
lowerCaseSlug,
env.url,
env
)

return createRedirectResponse(
`${redirectSlug ?? lowerCaseSlug}.config.json`,
env.url
)
}

const grapherPageResp = await fetchUnparsedGrapherConfig(slug, env, etag)

Expand All @@ -169,29 +174,10 @@ async function handleConfigRequest(
return new Response(null, { status: 304 })
}

if (grapherPageResp.status !== 200) {
// If the request is a 404, we check if there's a redirect for it.
// If there is, we redirect to the new page.
const redirectSlug = await getOptionalRedirectForSlug(
slug,
env.url,
env
)
if (redirectSlug && redirectSlug !== slug) {
console.log("Redirecting to ", redirectSlug)
return createRedirectResponse(
`${redirectSlug}.config.json`,
env.url
)
} else {
console.log("Returning 404 for ", slug)
// Otherwise we just return the status code.
return new Response(null, { status: grapherPageResp.status })
}
if (grapherPageResp.status === 404) {
throw new StatusError(404)
}

console.log("Returning 200 for ", slug)

const cacheControl = shouldCache
? "public, s-maxage=3600, max-age=0, must-revalidate"
: "public, s-maxage=0, max-age=0, must-revalidate"
Expand Down