Skip to content

Commit 95613e3

Browse files
committed
Update llmstxt.ts to structure llms.txt a bit cleaner
1 parent e565d85 commit 95613e3

File tree

1 file changed

+186
-23
lines changed

1 file changed

+186
-23
lines changed

data/onPostBuild/llmstxt.ts

Lines changed: 186 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,16 @@ import languageInfo from '../../src/data/languages/languageInfo';
88
* It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
99
*/
1010

11-
const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
11+
const LLMS_TXT_PREAMBLE = `# Ably Documentation
12+
13+
> Ably is a realtime experience infrastructure platform that provides pub/sub messaging, chat, realtime data synchronization, and more.
14+
15+
- **Global Edge Network**: Ultra-low latency realtime messaging delivered through a globally distributed edge network
16+
- **Enterprise Scale**: Built to handle millions of concurrent connections with guaranteed message delivery
17+
- **Multiple Products**: Pub/Sub, Chat, LiveSync, LiveObjects, Spaces, Asset Tracking, and powerful integrations
18+
- **Developer-Friendly SDKs**: SDKs available for JavaScript, Python, Java, Go, Swift, and many more languages
19+
20+
`;
1221

1322
const REPORTER_PREFIX = 'onPostBuild:';
1423

@@ -25,6 +34,8 @@ const VALID_LANGUAGES = [
2534
'ruby',
2635
'swift',
2736
'go',
37+
'kotlin',
38+
'react',
2839
];
2940

3041
// Function to get the display label for a language
@@ -78,6 +89,76 @@ const escapeMarkdown = (text: string) => {
7889
return text.replace(/([\\`*_{}[\]()#+!])/g, '\\$1');
7990
};
8091

92+
// Category structure for organizing pages
93+
interface CategoryStructure {
94+
[category: string]: {
95+
title: string;
96+
subcategories: {
97+
[subcategory: string]: {
98+
title: string;
99+
pages: Array<{
100+
slug: string;
101+
meta: { title: string; meta_description: string };
102+
languages: string[];
103+
}>;
104+
};
105+
};
106+
};
107+
}
108+
109+
// Function to categorize a page based on its slug
110+
const categorizePage = (slug: string): { category: string; subcategory: string } => {
111+
const parts = slug.split('/');
112+
const firstPart = parts[0] || 'general';
113+
114+
// Define category mappings
115+
const categoryMap: Record<string, { category: string; subcategory: string }> = {
116+
// Platform
117+
platform: { category: 'Platform', subcategory: 'Platform & Account' },
118+
auth: { category: 'Platform', subcategory: 'Authentication' },
119+
api: { category: 'Platform', subcategory: 'API Reference' },
120+
sdks: { category: 'Platform', subcategory: 'SDKs' },
121+
122+
// Pub/Sub - Core realtime messaging features
123+
basics: { category: 'Pub/Sub', subcategory: 'Basics' },
124+
channels: { category: 'Pub/Sub', subcategory: 'Channels' },
125+
connect: { category: 'Pub/Sub', subcategory: 'Connections' },
126+
'getting-started': { category: 'Pub/Sub', subcategory: 'Getting Started' },
127+
guides: { category: 'Pub/Sub', subcategory: 'Guides' },
128+
'how-to': { category: 'Pub/Sub', subcategory: 'How-To' },
129+
messages: { category: 'Pub/Sub', subcategory: 'Messages' },
130+
'metadata-stats': { category: 'Pub/Sub', subcategory: 'Metadata & Statistics' },
131+
'presence-occupancy': { category: 'Pub/Sub', subcategory: 'Presence & Occupancy' },
132+
protocols: { category: 'Pub/Sub', subcategory: 'Protocols' },
133+
'pub-sub': { category: 'Pub/Sub', subcategory: 'Pub/Sub Features' },
134+
push: { category: 'Pub/Sub', subcategory: 'Push Notifications' },
135+
'storage-history': { category: 'Pub/Sub', subcategory: 'Storage & History' },
136+
137+
// Chat
138+
chat: { category: 'Chat', subcategory: 'Chat' },
139+
140+
// Spaces
141+
spaces: { category: 'Spaces', subcategory: 'Spaces' },
142+
143+
// LiveObjects
144+
liveobjects: { category: 'LiveObjects', subcategory: 'LiveObjects' },
145+
146+
// LiveSync
147+
livesync: { category: 'LiveSync', subcategory: 'LiveSync' },
148+
149+
// Asset Tracking
150+
'asset-tracking': { category: 'Asset Tracking', subcategory: 'Asset Tracking' },
151+
};
152+
153+
// Check if the first part matches a known category
154+
if (categoryMap[firstPart]) {
155+
return categoryMap[firstPart];
156+
}
157+
158+
// Default categorization for uncategorized pages
159+
return { category: 'General', subcategory: 'Documentation' };
160+
};
161+
81162
// Function to extract code element classes from an MDX file
82163
const extractCodeLanguages = async (filePath: string): Promise<Set<string>> => {
83164
try {
@@ -217,32 +298,114 @@ export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter
217298
`${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt (${textilePages.length} textile, ${mdxPages.length} MDX)`,
218299
);
219300

220-
const serializedPages = [LLMS_TXT_PREAMBLE];
301+
// Organize pages into categories
302+
const categoryStructure: CategoryStructure = {};
221303

222304
for (const page of allPages) {
223-
const { slug, meta, languages } = page;
224-
const { title, meta_description } = meta;
225-
226-
try {
227-
const baseUrl = prefixPath({ url: `/docs/${slug}`, siteUrl, pathPrefix: basePath });
228-
const safeTitle = escapeMarkdown(title);
229-
230-
// Generate base page entry (without language parameter)
231-
const baseLink = `[${safeTitle}](${baseUrl})`;
232-
const baseLine = `- ${[baseLink, meta_description].join(': ')}`;
233-
serializedPages.push(baseLine);
234-
235-
// Generate language-specific entries if the page has languages
236-
if (languages && languages.length > 0) {
237-
for (const language of languages) {
238-
const langUrl = `${baseUrl}?lang=${language}`;
239-
const langLink = `[${safeTitle} (${getLanguageLabel(language)})](${langUrl})`;
240-
const langLine = `- ${[langLink, meta_description].join(': ')}`;
241-
serializedPages.push(langLine);
305+
const { category, subcategory } = categorizePage(page.slug);
306+
307+
// Initialize category if it doesn't exist
308+
if (!categoryStructure[category]) {
309+
categoryStructure[category] = {
310+
title: category,
311+
subcategories: {},
312+
};
313+
}
314+
315+
// Initialize subcategory if it doesn't exist
316+
if (!categoryStructure[category].subcategories[subcategory]) {
317+
categoryStructure[category].subcategories[subcategory] = {
318+
title: subcategory,
319+
pages: [],
320+
};
321+
}
322+
323+
// Add page to subcategory (only base page without language variants)
324+
categoryStructure[category].subcategories[subcategory].pages.push(page);
325+
}
326+
327+
// Generate serialized output with categorization
328+
const serializedPages = [LLMS_TXT_PREAMBLE];
329+
330+
// Define the order of categories
331+
const categoryOrder = [
332+
'Platform',
333+
'Pub/Sub',
334+
'Chat',
335+
'Spaces',
336+
'LiveObjects',
337+
'LiveSync',
338+
'Asset Tracking',
339+
'General',
340+
];
341+
342+
// Sort categories by defined order
343+
const sortedCategories = Object.keys(categoryStructure).sort((a, b) => {
344+
const indexA = categoryOrder.indexOf(a);
345+
const indexB = categoryOrder.indexOf(b);
346+
if (indexA === -1 && indexB === -1) return a.localeCompare(b);
347+
if (indexA === -1) return 1;
348+
if (indexB === -1) return -1;
349+
return indexA - indexB;
350+
});
351+
352+
for (const categoryKey of sortedCategories) {
353+
const category = categoryStructure[categoryKey];
354+
serializedPages.push(`## ${category.title}`);
355+
serializedPages.push('');
356+
357+
// Sort subcategories alphabetically
358+
const sortedSubcategories = Object.keys(category.subcategories).sort();
359+
360+
for (const subcategoryKey of sortedSubcategories) {
361+
const subcategory = category.subcategories[subcategoryKey];
362+
serializedPages.push(`### ${subcategory.title}`);
363+
364+
for (const page of subcategory.pages) {
365+
const { slug, meta, languages } = page;
366+
const { title, meta_description } = meta;
367+
368+
try {
369+
const baseUrl = prefixPath({ url: `/docs/${slug}`, siteUrl, pathPrefix: basePath });
370+
const safeTitle = escapeMarkdown(title);
371+
372+
// Generate base page entry (without language parameter)
373+
const baseLink = `[${safeTitle}](${baseUrl})`;
374+
const baseLine = `- ${[baseLink, meta_description].join(': ')}`;
375+
serializedPages.push(baseLine);
376+
377+
// Generate language-specific entries if the page has languages
378+
// Skip language variants that match the page's primary language (e.g., skip ?lang=go for /getting-started/go)
379+
if (languages && languages.length > 0) {
380+
// Extract the last part of the slug to check if it matches a language
381+
const slugParts = slug.split('/');
382+
const slugLastPart = slugParts[slugParts.length - 1];
383+
384+
// Map slug names to their corresponding language codes
385+
const slugToLangMap: Record<string, string> = {
386+
dotnet: 'csharp',
387+
'objective-c': 'objc',
388+
};
389+
390+
// Get the primary language for this page (either direct match or mapped)
391+
const primaryLanguage = slugToLangMap[slugLastPart] || slugLastPart;
392+
393+
for (const language of languages) {
394+
// Skip if the language matches the page's primary language
395+
if (language !== primaryLanguage) {
396+
const langUrl = `${baseUrl}?lang=${language}`;
397+
const langLink = `[${safeTitle} (${getLanguageLabel(language)})](${langUrl})`;
398+
const langLine = `- ${[langLink, meta_description].join(': ')}`;
399+
serializedPages.push(langLine);
400+
}
401+
}
402+
}
403+
} catch (err) {
404+
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
242405
}
243406
}
244-
} catch (err) {
245-
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
407+
408+
serializedPages.push(''); // Add blank line after each subcategory
246409
}
247410
}
248411

0 commit comments

Comments
 (0)