-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add airtable-cache prototype #16
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
FROM node:20-alpine@sha256:7a91aa397f2e2dfbfcdad2e2d72599f374e0b0172be1d86eeb73f1d33f36a4b2 AS base | ||
|
||
RUN apk update && apk add --no-cache dumb-init | ||
|
||
WORKDIR /app | ||
|
||
ENV NODE_ENV production | ||
|
||
COPY package.json ./ | ||
COPY dist_tools/package-lock.json ./ | ||
|
||
RUN --mount=type=cache,target=/app/.npm \ | ||
npm set cache /app/.npm && \ | ||
npm ci --omit=dev --audit false --fund false | ||
|
||
USER node | ||
|
||
COPY --chown=node:node ./dist/ . | ||
|
||
EXPOSE 8080 | ||
|
||
CMD dumb-init PORT=8080 node index.js |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# airtable-cache | ||
|
||
A caching proxy to Airtable. Consumers can generally ignore the details, and just pretend this is Airtable itself for standard [Airtable API](https://airtable.com/developers/web/api/) routes. For example, with [`airtable-ts`](https://github.com/domdomegg/airtable-ts): | ||
|
||
```ts | ||
import { AirtableTs } from 'airtable-ts'; | ||
|
||
const db = new AirtableTs({ | ||
apiKey: 'pat1234.abcdef', | ||
// Add this line | ||
endpointUrl: 'https://airtable-proxy.k8s.bluedot.org/' | ||
}); | ||
``` | ||
|
||
## Details | ||
|
||
Under the hood, we intercept and cache certain types of successful responses. This means data will usually load more quickly, especially for larger tables. | ||
|
||
We store the cache results in a Postgres table. We chose this over a cache solution like Redis because we already have a Postgres cluster set up, and [we like boring technology](https://boringtechnology.club/). | ||
|
||
We invalidate cache entries when records are directly updated. Updates performed outside of airtable-cache can risk stale data being returned. Your app should be able to handle stale data this given Airtable itself only provides eventually consistent updates. However, if you _really_ need to override the cache, send a DELETE request to `/cache/{cacheKeyPrefix}`, with your Airtable Bearer token authorization. The `cacheKey` should be a `baseId`, a `baseId,tableId`, or `baseId,tableId,recordId` - do try to be as specific as possible! | ||
|
||
## Developer setup | ||
|
||
No special actions needed, just follow [the general developer setup instructions](../../README.md#developer-setup-instructions) | ||
|
||
## Deployment | ||
|
||
This app is deployed onto the K8s cluster as a docker container. | ||
|
||
To deploy a new version, simply commit to the master branch. GitHub Actions automatically handles CD. | ||
|
||
## TODO | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Delete this TODO - has now been implemented |
||
|
||
- deleting old entries in the cache. need some kind of cron job in the app |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{ | ||
"name": "@bluedot/airtable-cache", | ||
"version": "1.0.0", | ||
"private": true, | ||
"scripts": { | ||
"start": "tsup --watch --onSuccess \"npm run kanel && NODE_ENV=development node dist/index.js\"", | ||
"build": "tsc && tsup && cp ../../package-lock.json dist_tools", | ||
"deploy:prod": "tools/deployDocker.sh", | ||
"kanel": "tsup tools/kanel.ts --outDir dist_tools --external kanel --external kanel-kysely && NODE_ENV=development node dist_tools/kanel.js", | ||
"resetdb": "dropdb --host=localhost --port=5432 --force postgres && createdb --host=localhost --port=5432 postgres", | ||
"createLock": "tsup tools/createLock.ts --outDir dist_tools --external @npmcli/arborist && NODE_ENV=development node dist_tools/createLock.js", | ||
"test": "vitest --run", | ||
"test:watch": "vitest", | ||
"lint": "eslint ." | ||
}, | ||
"dependencies": { | ||
"@bluedot/backend-contract": "*", | ||
"@fastify/cors": "^8.5.0", | ||
"@ts-rest/core": "^3.41.1", | ||
"@ts-rest/fastify": "^3.41.1", | ||
"axios": "^1.7.7", | ||
"croner": "^8.1.1", | ||
"fast-jwt": "^4.0.1", | ||
"fastify": "^4.26.2", | ||
"fastify-plugin": "^4.5.1", | ||
"get-jwks": "^9.0.0", | ||
"http-errors": "^2.0.0", | ||
"kysely": "^0.26.3", | ||
"pg": "^8.11.3", | ||
"zod": "^3.22.4" | ||
}, | ||
"devDependencies": { | ||
"@bluedot/eslint-config": "*", | ||
"@bluedot/typescript-config": "*", | ||
"@types/http-errors": "^2.0.4", | ||
"@types/node": "^20.12.12", | ||
"@types/pg": "^8.10.9", | ||
"eslint": "^8.57.0", | ||
"kanel": "^3.8.2", | ||
"kanel-kysely": "^0.3.2", | ||
"tsup": "^8.0.1", | ||
"typescript": "^5.3.2", | ||
"vitest": "^1.0.2" | ||
}, | ||
"tsup": { | ||
"entry": [ | ||
"src/index.ts" | ||
], | ||
"noExternal": [ | ||
"@bluedot/backend-contract" | ||
], | ||
"outDir": "dist" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
import { fastify, FastifyReply, FastifyRequest } from 'fastify'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. File is a little messy and probably too long: tidy up and split into multiple files |
||
import { fastifyCors } from '@fastify/cors'; | ||
import axios, { AxiosResponse } from 'axios'; | ||
import { errorHandlerPlugin } from './lib/errorHandlerPlugin'; | ||
import { sha256 } from './lib/sha256'; | ||
import { | ||
CacheValue, clearValues, clearValuesContainingBody, getValue, setValue, | ||
} from './lib/cache'; | ||
|
||
export const getInstance = async () => { | ||
const instance = fastify(); | ||
|
||
await instance.register(errorHandlerPlugin); | ||
await instance.register(fastifyCors); | ||
|
||
await instance.register(async (i) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment, this is a free proxy for any requests to Airtable, and will cache anybody's requests there. We should instead validate the token is from some allowlisted set. |
||
i.get('/', async (req, reply) => { | ||
reply.redirect('https://github.com/bluedotimpact/bluedot/tree/master/apps/airtable-cache#readme'); | ||
}); | ||
|
||
i.get('/v0/meta/bases/:baseId/tables', cachedGetHandler); | ||
i.get('/v0/:baseId/:tableId', cachedGetHandler); | ||
i.get('/v0/:baseId/:tableId/:recordId', cachedGetHandler); | ||
|
||
// TODO: clear out base schema | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These TODOs are done |
||
i.post('/v0/meta/bases/:baseId/tables', cacheClearingHandler(['baseSchema'])); | ||
i.post('/v0/meta/bases/:baseId/tables/:tableId/fields/:fieldId', cacheClearingHandler(['baseSchema'])); | ||
|
||
// TODO: clear out base schema, clear out all table data, and all records in that table | ||
i.post('/v0/meta/bases/:baseId/tables/:tableId/fields', cacheClearingHandler(['baseSchema', 'tableData', 'tableRecords'])); | ||
|
||
// TODO: clear out table data with mentioned records, and mentioned record | ||
i.patch('/v0/:baseId/:tableId', cacheClearingHandler(['mentionedRecords'])); | ||
i.put('/v0/:baseId/:tableId', cacheClearingHandler(['mentionedRecords'])); | ||
i.delete('/v0/:baseId/:tableId', cacheClearingHandler(['mentionedRecords'])); | ||
i.patch('/v0/:baseId/:tableId/:recordId', cacheClearingHandler(['mentionedRecords'])); | ||
i.put('/v0/:baseId/:tableId/:recordId', cacheClearingHandler(['mentionedRecords'])); | ||
i.delete('/v0/:baseId/:tableId/:recordId', cacheClearingHandler(['mentionedRecords'])); | ||
i.post('/v0/:baseId/:recordId/:fieldId/uploadAttachment', cacheClearingHandler(['mentionedRecords'])); | ||
|
||
// TODO: clear out all table data | ||
i.post('/v0/:baseId/:tableId', cacheClearingHandler(['tableData'])); | ||
|
||
i.route({ | ||
method: ['DELETE', 'GET', 'HEAD', 'PATCH', 'POST', 'PUT'], | ||
url: '/*', | ||
handler: async (req, reply) => { | ||
const airtableResponse = await proxyToAirtable(req); | ||
return airtableToFastifyReply(airtableResponse, reply); | ||
}, | ||
}); | ||
}); | ||
|
||
return instance; | ||
}; | ||
|
||
const cachedGetHandler = async (req: FastifyRequest, reply: FastifyReply) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe consider setting up webhooks on Airtable so that if the records change through other routes e.g. different API changes, or manual edits in Airtable, the cache isn't stale. Or at least make it very easy to clear the cache - maybe a web UI or something. |
||
const cacheResult = await getValue(createCacheKeyFromReq(req)).catch((err) => { | ||
// We don't want to block requests if the caching functionality fails | ||
// E.g. if Postgres is down, users should still be able to talk with Airtable | ||
console.error('Error while getting a value from the cache', err); | ||
return { status: 'miss' } as const; | ||
}); | ||
if (cacheResult.status === 'hit') { | ||
return cacheToFastifyReply(cacheResult.value, reply); | ||
} | ||
if (cacheResult.status === 'stale') { | ||
// We don't await this promise so we return the cached response to the user quickly. | ||
// This promise revalidates the stale entry in the background. | ||
proxyToAirtableWithCaching(req).catch((err) => { | ||
console.error('Error while refreshing a stale but valid entry in the cache', err); | ||
}); | ||
|
||
return cacheToFastifyReply(cacheResult.value, reply); | ||
} | ||
|
||
const airtableResponse = await proxyToAirtableWithCaching(req); | ||
return airtableToFastifyReply(airtableResponse, reply); | ||
}; | ||
|
||
const proxyToAirtableWithCaching = async (req: FastifyRequest): Promise<AxiosResponse> => { | ||
const airtableResponse = await proxyToAirtable(req); | ||
|
||
if (airtableResponse.status === 200) { | ||
try { | ||
// We currently just cache the response for the exact same request in future | ||
// e.g. If we got a list of records, we don't cache the records individually. | ||
// We could cache the records in the future, with attention to which fields have been requested. | ||
// We should only explore this if we do find this is a common request pattern, and it would | ||
// actually bring value to speed this up. | ||
await setValue(createCacheKeyFromReq(req), airtableResponseToCacheValue(airtableResponse)); | ||
} catch (err) { | ||
// We don't want to block requests if the caching functionality fails | ||
// E.g. if Postgres is down, users should still be able to talk with Airtable | ||
console.error('Error while setting a value in the cache', err); | ||
} | ||
} | ||
|
||
return airtableResponse; | ||
}; | ||
|
||
const proxyToAirtable = async (req: FastifyRequest): Promise<AxiosResponse> => { | ||
const { | ||
method, url, headers, body, | ||
} = req; | ||
|
||
// Forward the request to Airtable | ||
return axios({ | ||
method, | ||
baseURL: 'https://api.airtable.com', | ||
url, | ||
headers: { | ||
...headers, | ||
host: 'api.airtable.com', | ||
}, | ||
data: method !== 'GET' && method !== 'HEAD' ? body : undefined, | ||
validateStatus: () => true, | ||
responseType: 'text', | ||
}); | ||
}; | ||
|
||
const airtableToFastifyReply = async (airtableResponse: AxiosResponse, reply: FastifyReply) => { | ||
reply.status(airtableResponse.status); | ||
const headersJSON = (airtableResponse.headers.toJSON as (b: boolean) => Record<string, string>)(true); | ||
reply.headers(headersJSON); | ||
|
||
return airtableResponse.data; | ||
}; | ||
|
||
const cacheToFastifyReply = async (cacheValue: CacheValue, reply: FastifyReply) => { | ||
reply.status(cacheValue.status); | ||
reply.headers(cacheValue.headers); | ||
return cacheValue.body; | ||
}; | ||
|
||
const createCacheKeyFromReq = (req: FastifyRequest) => { | ||
const { baseId, tableId, recordId } = req.params as Record<string, string>; | ||
const token = req.headers.authorization?.slice('Bearer '.length).trim() ?? ''; | ||
return createCacheKeyPrefix({ | ||
baseId, tableId, recordId, token, path: req.url, | ||
}); | ||
}; | ||
|
||
const createCacheKeyPrefix = ({ | ||
baseId, tableId, recordId, token, path, | ||
}: { token: string, baseId?: string, tableId?: string, recordId?: string, path?: string }) => { | ||
return [sha256(token), baseId, tableId, recordId, path ? sha256(path) : undefined].filter((v) => v !== undefined).join(','); | ||
}; | ||
|
||
const airtableResponseToCacheValue = (airtableResponse: AxiosResponse): CacheValue => { | ||
const headersJSON = (airtableResponse.headers.toJSON as (b: boolean) => Record<string, string>)(true); | ||
|
||
return { | ||
body: airtableResponse.data, | ||
headers: headersJSON, | ||
status: airtableResponse.status, | ||
}; | ||
}; | ||
|
||
type CacheClearType = | ||
// Clear the baseSchema response for the given schemaId in the path | ||
| 'baseSchema' | ||
// Clear any table responses in the table for the given tableId in the path | ||
| 'tableData' | ||
// Clear any record responses in the table for the given tableId in the path | ||
| 'tableRecords' | ||
// Clear any table or record responses that include the recordId in the path, or record ids in the body of the request | ||
| 'mentionedRecords'; | ||
|
||
const cacheClearingHandler = (cachesToClear: CacheClearType[]) => async (req: FastifyRequest, reply: FastifyReply) => { | ||
await Promise.all(cachesToClear.map((t) => clearCache(req, t))); | ||
|
||
const airtableResponse = await proxyToAirtableWithCaching(req); | ||
return airtableToFastifyReply(airtableResponse, reply); | ||
}; | ||
|
||
const clearCache = async (req: FastifyRequest, cacheClearType: CacheClearType): Promise<void> => { | ||
const token = req.headers.authorization?.slice('Bearer '.length).trim() ?? ''; | ||
const { baseId, tableId, recordId } = req.params as Record<string, string>; | ||
|
||
switch (cacheClearType) { | ||
case 'baseSchema': { | ||
const keyPrefix = createCacheKeyPrefix({ | ||
// path is specified to force recordId to be blank, to only select for the base schema | ||
token, baseId, tableId: '', recordId: '', path: '', | ||
}); | ||
await clearValues(keyPrefix); | ||
break; | ||
} | ||
case 'tableData': { | ||
const keyPrefix = createCacheKeyPrefix({ | ||
// path is specified to force recordId to be blank, to only select for the table responses | ||
token, baseId, tableId, recordId: '', path: '', | ||
}); | ||
await clearValues(keyPrefix); | ||
break; | ||
} | ||
case 'tableRecords': { | ||
const keyPrefix = createCacheKeyPrefix({ | ||
token, baseId, tableId, recordId: 'rec', | ||
}); | ||
await clearValues(keyPrefix); | ||
break; | ||
} | ||
case 'mentionedRecords': { | ||
const bodyString = typeof req.body === 'object' ? (JSON.stringify(req.body)) : String(req.body); | ||
const recordIdsInBody = /rec[a-zA-Z0-9]{14}/g.exec(bodyString) ?? []; | ||
const uniqueRecordIds = new Set(recordIdsInBody); | ||
if (recordId) { | ||
uniqueRecordIds.add(recordId); | ||
} | ||
|
||
// NB: we intentionally don't specify table here | ||
// This makes updates to linked records across tables function correctly | ||
// We can revisit this if performance becomes an issue | ||
const keyPrefix = createCacheKeyPrefix({ token, baseId, tableId: 'tbl' }).slice(0, -2); | ||
await Promise.all([...uniqueRecordIds].map((r) => clearValuesContainingBody(keyPrefix, r))); | ||
|
||
break; | ||
} | ||
default: { | ||
const shouldBeNever: never = cacheClearType; | ||
throw new Error(`Unknown cacheClearType: ${shouldBeNever}`); | ||
} | ||
} | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import { Pool } from 'pg'; | ||
import { Kysely, PostgresDialect } from 'kysely'; | ||
// All these ignores are to allow this to pass in CI, where we don't have a database to generate the files with | ||
// If we do more with backend, we should fix this properly by having CI spin up a database | ||
// @ts-ignore | ||
// eslint-disable-next-line import/extensions | ||
import Database from './generated/Database'; | ||
import { env } from '../env'; | ||
|
||
const dialect = new PostgresDialect({ | ||
pool: new Pool({ | ||
connectionString: env.DATABASE_CONNECTION_STRING, | ||
}), | ||
}); | ||
|
||
export const db = new Kysely<Database>({ dialect }); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { describe, expect, test } from 'vitest'; | ||
import { readdirSync } from 'fs'; | ||
import { migrations } from './list'; | ||
|
||
// to avoid forgetting to add a migration to the list | ||
describe('all migrations have been added to list', () => { | ||
const EXCLUDED_FILES = [ | ||
'list.ts', | ||
'migrator.ts', | ||
]; | ||
const migrationFiles = readdirSync(__dirname).filter((f) => !f.endsWith('.test.ts') && !EXCLUDED_FILES.includes(f)); | ||
|
||
test.each(migrationFiles)('file: %s', (file) => { | ||
expect(file.endsWith('.ts')).toBe(true); | ||
expect(migrations[file.slice(0, -3)]).toBeTruthy(); | ||
}); | ||
}); | ||
|
||
// to avoid accidentally mapping a migration to the wrong value | ||
describe('all migrations are named the same as their key', () => { | ||
const entries = Object.entries(migrations); | ||
test.each(entries)('key: %s', (key, migration) => { | ||
expect(migration.name).toBe(key); | ||
}); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import { Migration } from 'kysely'; | ||
import { v00001init } from './v00001init'; | ||
|
||
export const migrations: Record<string, Migration['up']> = { | ||
v00001init, | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to explain why this exists, i.e.:
Airtable has low rate limits
Airtable is slow (sometimes)