Skip to content

Commit

Permalink
feat: custom file formats in file content loader (#12047)
Browse files Browse the repository at this point in the history
* add custom file format support

* add tests

* lint/format

* changeset

* nits

* finish tests

* add nested json test

* requested changes

* update changeset with @sarah11918 suggestions

* typos/formatting

* add map<id, data> yaml test

* fix tests and rebase
  • Loading branch information
rgodha24 authored Sep 29, 2024
1 parent 0a1036e commit 21b5e80
Show file tree
Hide file tree
Showing 10 changed files with 408 additions and 29 deletions.
68 changes: 68 additions & 0 deletions .changeset/lovely-pianos-breathe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
'astro': minor
---

Adds a new optional `parser` property to the built-in `file()` loader for content collections to support additional file types such as `toml` and `csv`.

The `file()` loader now accepts a second argument that defines a `parser` function. This allows you to specify a custom parser (e.g. `toml.parse` or `csv-parse`) to create a collection from a file's contents. The `file()` loader will automatically detect and parse JSON and YAML files (based on their file extension) with no need for a `parser`.

This works with any type of custom file formats including `csv` and `toml`. The following example defines a content collection `dogs` using a `.toml` file.
```toml
[[dogs]]
id = "..."
age = "..."

[[dogs]]
id = "..."
age = "..."
```
After importing TOML's parser, you can load the `dogs` collection into your project by passing both a file path and `parser` to the `file()` loader.
```typescript
import { defineCollection } from "astro:content"
import { file } from "astro/loaders"
import { parse as parseToml } from "toml"

const dogs = defineCollection({
loader: file("src/data/dogs.toml", { parser: (text) => parseToml(text).dogs }),
schema: /* ... */
})

// it also works with CSVs!
import { parse as parseCsv } from "csv-parse/sync";

const cats = defineCollection({
loader: file("src/data/cats.csv", { parser: (text) => parseCsv(text, { columns: true, skipEmptyLines: true })})
});
```

The `parser` argument also allows you to load a single collection from a nested JSON document. For example, this JSON file contains multiple collections:
```json
{"dogs": [{}], "cats": [{}]}
```

You can seperate these collections by passing a custom `parser` to the `file()` loader like so:
```typescript
const dogs = defineCollection({
loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).dogs })
});
const cats = defineCollection({
loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).cats })
});
```

And it continues to work with maps of `id` to `data`
```yaml
bubbles:
breed: "Goldfish"
age: 2
finn:
breed: "Betta"
age: 1
```
```typescript
const fish = defineCollection({
loader: file("src/data/fish.yaml"),
schema: z.object({ breed: z.string(), age: z.number() })
});
```
61 changes: 46 additions & 15 deletions packages/astro/src/content/loaders/file.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,56 @@
import { promises as fs, existsSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import yaml from 'js-yaml';
import { posixRelative } from '../utils.js';
import type { Loader, LoaderContext } from './types.js';

export interface FileOptions {
/**
* the parsing function to use for this data
* @default JSON.parse or yaml.load, depending on the extension of the file
* */
parser?: (
text: string,
) => Record<string, Record<string, unknown>> | Array<Record<string, unknown>>;
}

/**
* Loads entries from a JSON file. The file must contain an array of objects that contain unique `id` fields, or an object with string keys.
* @todo Add support for other file types, such as YAML, CSV etc.
* @param fileName The path to the JSON file to load, relative to the content directory.
* @param options Additional options for the file loader
*/
export function file(fileName: string): Loader {
export function file(fileName: string, options?: FileOptions): Loader {
if (fileName.includes('*')) {
// TODO: AstroError
throw new Error('Glob patterns are not supported in `file` loader. Use `glob` loader instead.');
}

let parse: ((text: string) => any) | null = null;

const ext = fileName.split('.').at(-1);
if (ext === 'json') {
parse = JSON.parse;
} else if (ext === 'yml' || ext === 'yaml') {
parse = (text) =>
yaml.load(text, {
filename: fileName,
});
}
if (options?.parser) parse = options.parser;

if (parse === null) {
// TODO: AstroError
throw new Error(
`No parser found for file '${fileName}'. Try passing a parser to the \`file\` loader.`,
);
}

async function syncData(filePath: string, { logger, parseData, store, config }: LoaderContext) {
let json: Array<Record<string, unknown>>;
let data: Array<Record<string, unknown>> | Record<string, Record<string, unknown>>;

try {
const data = await fs.readFile(filePath, 'utf-8');
json = JSON.parse(data);
const contents = await fs.readFile(filePath, 'utf-8');
data = parse!(contents);
} catch (error: any) {
logger.error(`Error reading data from ${fileName}`);
logger.debug(error.message);
Expand All @@ -28,28 +59,28 @@ export function file(fileName: string): Loader {

const normalizedFilePath = posixRelative(fileURLToPath(config.root), filePath);

if (Array.isArray(json)) {
if (json.length === 0) {
if (Array.isArray(data)) {
if (data.length === 0) {
logger.warn(`No items found in ${fileName}`);
}
logger.debug(`Found ${json.length} item array in ${fileName}`);
logger.debug(`Found ${data.length} item array in ${fileName}`);
store.clear();
for (const rawItem of json) {
for (const rawItem of data) {
const id = (rawItem.id ?? rawItem.slug)?.toString();
if (!id) {
logger.error(`Item in ${fileName} is missing an id or slug field.`);
continue;
}
const data = await parseData({ id, data: rawItem, filePath });
store.set({ id, data, filePath: normalizedFilePath });
const parsedData = await parseData({ id, data: rawItem, filePath });
store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else if (typeof json === 'object') {
const entries = Object.entries<Record<string, unknown>>(json);
} else if (typeof data === 'object') {
const entries = Object.entries<Record<string, unknown>>(data);
logger.debug(`Found object with ${entries.length} entries in ${fileName}`);
store.clear();
for (const [id, rawItem] of entries) {
const data = await parseData({ id, data: rawItem, filePath });
store.set({ id, data, filePath: normalizedFilePath });
const parsedData = await parseData({ id, data: rawItem, filePath });
store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else {
logger.error(`Invalid data in ${fileName}. Must be an array or object.`);
Expand Down
70 changes: 61 additions & 9 deletions packages/astro/test/content-layer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ describe('Content Layer', () => {
assert.equal(json.customLoader.length, 5);
});

it('Returns `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('fileLoader'));
assert.ok(Array.isArray(json.fileLoader));
it('Returns json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('jsonLoader'));
assert.ok(Array.isArray(json.jsonLoader));

const ids = json.fileLoader.map((item) => item.data.id);
const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
Expand Down Expand Up @@ -97,6 +97,58 @@ describe('Content Layer', () => {
);
});

it('Returns nested json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('nestedJsonLoader'));
assert.ok(Array.isArray(json.nestedJsonLoader));

const ids = json.nestedJsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
});

it('Returns yaml `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('yamlLoader'));
assert.ok(Array.isArray(json.yamlLoader));

const ids = json.yamlLoader.map((item) => item.id);
assert.deepEqual(ids, [
'bubbles',
'finn',
'shadow',
'spark',
'splash',
'nemo',
'angel-fish',
'gold-stripe',
'blue-tail',
'bubble-buddy',
]);
});

it('Returns toml `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('tomlLoader'));
assert.ok(Array.isArray(json.tomlLoader));

const ids = json.tomlLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'crown',
'nikes-on-my-feet',
'stars',
'never-let-me-down',
'no-church-in-the-wild',
'family-ties',
'somebody',
'honest',
]);
});

it('Returns nested json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('nestedJsonLoader'));
assert.ok(Array.isArray(json.nestedJsonLoader));

const ids = json.nestedJsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
});

it('Returns data entry by id', async () => {
assert.ok(json.hasOwnProperty('dataEntry'));
assert.equal(json.dataEntry.filePath?.split(sep).join(posixSep), 'src/data/dogs.json');
Expand Down Expand Up @@ -276,10 +328,10 @@ describe('Content Layer', () => {
});

it('Returns `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('fileLoader'));
assert.ok(Array.isArray(json.fileLoader));
assert.ok(json.hasOwnProperty('jsonLoader'));
assert.ok(Array.isArray(json.jsonLoader));

const ids = json.fileLoader.map((item) => item.data.id);
const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
Expand Down Expand Up @@ -348,7 +400,7 @@ describe('Content Layer', () => {
it('updates collection when data file is changed', async () => {
const rawJsonResponse = await fixture.fetch('/collections.json');
const initialJson = devalue.parse(await rawJsonResponse.text());
assert.equal(initialJson.fileLoader[0].data.temperament.includes('Bouncy'), false);
assert.equal(initialJson.jsonLoader[0].data.temperament.includes('Bouncy'), false);

await fixture.editFile('/src/data/dogs.json', (prev) => {
const data = JSON.parse(prev);
Expand All @@ -359,7 +411,7 @@ describe('Content Layer', () => {
await fixture.onNextDataStoreChange();
const updatedJsonResponse = await fixture.fetch('/collections.json');
const updated = devalue.parse(await updatedJsonResponse.text());
assert.ok(updated.fileLoader[0].data.temperament.includes('Bouncy'));
assert.ok(updated.jsonLoader[0].data.temperament.includes('Bouncy'));
await fixture.resetAllFiles();
});
});
Expand Down
3 changes: 2 additions & 1 deletion packages/astro/test/fixtures/content-layer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"private": true,
"dependencies": {
"astro": "workspace:*",
"@astrojs/mdx": "workspace:*"
"@astrojs/mdx": "workspace:*",
"toml": "^3.0.0"
}
}
44 changes: 44 additions & 0 deletions packages/astro/test/fixtures/content-layer/src/content/config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { defineCollection, z, reference } from 'astro:content';
import { file, glob } from 'astro/loaders';
import { loader } from '../loaders/post-loader.js';
import { parse as parseToml } from 'toml';

const blog = defineCollection({
loader: loader({ url: 'https://jsonplaceholder.typicode.com/posts' }),
Expand Down Expand Up @@ -118,6 +119,27 @@ const cats = defineCollection({
}),
});

const fish = defineCollection({
loader: file('src/data/fish.yaml'),
schema: z.object({
name: z.string(),
breed: z.string(),
age: z.number(),
}),
});

const birds = defineCollection({
loader: file('src/data/birds.json', {
parser: (text) => JSON.parse(text).birds,
}),
schema: z.object({
id: z.string(),
name: z.string(),
breed: z.string(),
age: z.number(),
}),
});

// Absolute paths should also work
const absoluteRoot = new URL('../../content/space', import.meta.url);

Expand Down Expand Up @@ -198,14 +220,36 @@ const increment = defineCollection({
},
});

const artists = defineCollection({
loader: file('src/data/music.toml', { parser: (text) => parseToml(text).artists }),
schema: z.object({
id: z.string(),
name: z.string(),
genre: z.string().array(),
}),
});

const songs = defineCollection({
loader: file('src/data/music.toml', { parser: (text) => parseToml(text).songs }),
schema: z.object({
id: z.string(),
name: z.string(),
artists: z.array(reference('artists')),
}),
});

export const collections = {
blog,
dogs,
cats,
fish,
birds,
numbers,
spacecraft,
increment,
images,
artists,
songs,
probes,
rodents,
};
34 changes: 34 additions & 0 deletions packages/astro/test/fixtures/content-layer/src/data/birds.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"birds": [
{
"id": "bluejay",
"name": "Blue Jay",
"breed": "Cyanocitta cristata",
"age": 3
},
{
"id": "robin",
"name": "Robin",
"breed": "Turdus migratorius",
"age": 2
},
{
"id": "sparrow",
"name": "Sparrow",
"breed": "Passer domesticus",
"age": 1
},
{
"id": "cardinal",
"name": "Cardinal",
"breed": "Cardinalis cardinalis",
"age": 4
},
{
"id": "goldfinch",
"name": "Goldfinch",
"breed": "Spinus tristis",
"age": 2
}
]
}
Loading

0 comments on commit 21b5e80

Please sign in to comment.