Skip to content

Commit

Permalink
Add more page content
Browse files Browse the repository at this point in the history
Signed-off-by: Daishan Peng <[email protected]>
  • Loading branch information
StrongMonkey committed Aug 31, 2024
1 parent 9d2ea2a commit 44da123
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 17 deletions.
40 changes: 28 additions & 12 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import { Client } from "@notionhq/client";
import dotenv from "dotenv";
import { writeFile, mkdir } from "fs/promises";
import path from "path";
import { SearchResponse } from "@notionhq/client/build/src/api-endpoints";
import { PageObjectResponse, SearchResponse } from "@notionhq/client/build/src/api-endpoints";
import { getPageContent } from "./page";
import * as fs from "node:fs";

dotenv.config();

Expand All @@ -12,18 +14,25 @@ async function main() {
});

// Function to write a page to a file
async function writePageToFile(page: any, directory: string) {
const pageId = page.id.replace(/-/g, '');
const filePath = path.join(directory, `${pageId}.data`);
await writeFile(filePath, JSON.stringify(page, null, 2));
console.log(`Wrote page ${pageId} to ${filePath}`);
async function writePageToFile(page: PageObjectResponse, directory: string) {
const pageId = page.id;
const pageContent = await getPageContent(notion, pageId);
const fileDir = path.join(directory, pageId.toString());
await mkdir(fileDir, { recursive: true });
let title = ((page.properties?.title ?? page.properties?.Name) as any)?.title[0]?.plain_text?.trim().replaceAll(/\//g, "-");
if (!title) {
title = pageId.toString();
}
const filePath = path.join(fileDir, title + ".md");
fs.writeFileSync(filePath, pageContent, "utf8");
}

// Function to fetch all pages
async function fetchAllPages() {
let pages: any[] = [];
let cursor: string | undefined = undefined;


while (true) {
const response: SearchResponse = await notion.search({
filter: {
Expand All @@ -47,15 +56,22 @@ async function main() {

// Fetch all pages
const pages = await fetchAllPages();
let metadata: Map<string, {
url: string;
}> = new Map();

// Define the output directory
const outputDir = path.join(process.env.WORKSPACE_DIR!!, 'knowledge', 'integrations', 'notion');
await mkdir(outputDir, { recursive: true }); // Ensure the directory exists
await mkdir(outputDir, { recursive: true });

for (const page of pages) {
await writePageToFile(page, outputDir);
metadata.set(page.id, {
url: page.url,
})
}

// Write all pages to files
await Promise.all(
pages.map((page) => writePageToFile(page, outputDir))
);
const metadataPath = path.join(outputDir, 'metadata.json');
await writeFile(metadataPath, JSON.stringify(Object.fromEntries(metadata)), 'utf8');

console.log(`Finished writing ${pages.length} pages to ${outputDir}`);
}
Expand Down
178 changes: 178 additions & 0 deletions src/page.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import { Client } from "@notionhq/client";
import { BlockObjectResponse } from "@notionhq/client/build/src/api-endpoints";


export async function getPageContent(client: Client, id: string, indentation = 0): Promise<string> {
const blocks = await client.blocks.children.list({block_id: id})
let result: string = '';
for (let b of blocks.results) {
let block = b as BlockObjectResponse;
// Tables are complicated, so we handle them completely separately
if (block.type === "table") {
result += await printTable(client, b)
continue
}

result += await printBlock(client, b as BlockObjectResponse, indentation)
if (block.has_children && block.type !== "child_page" && block.type !== "synced_block") {
result += await getPageContent(client, b.id, indentation + 2)
}
}
return result
}

async function printBlock(client: Client, b: BlockObjectResponse, indentation: number): Promise<string> {
let result: string = ""
if (indentation > 0) {
result += " ".repeat(indentation)
}
switch (b.type) {
case "bookmark":
if (b.bookmark.caption !== null && richTextArrayToString(b.bookmark.caption) !== "") {
result += `Bookmark: ${b.bookmark.url} (${richTextArrayToString(b.bookmark.caption)})`
} else {
result += `Bookmark: ${b.bookmark.url}`
}
break
case "bulleted_list_item":
result += `- ${richTextArrayToString(b.bulleted_list_item.rich_text)}`
break
case "callout":
result += `> ${richTextArrayToString(b.callout.rich_text)}`
break
case "child_database":
result += `Child Database: ${b.child_database.title}`
break
case "child_page":
result += `Child Page: ${b.child_page.title}`
break
case "code":
if (b.code.language !== null) {
result += "```" + b.code.language + "\n"
} else {
result += "```\n"
}
result += richTextArrayToString(b.code.rich_text)
result += "\n```"
if (b.code.caption !== null && richTextArrayToString(b.code.caption) !== "") {
result += `\n(${richTextArrayToString(b.code.caption)})`
}
break
case "divider":
result += "-------------------------------------"
break
case "embed":
result += `Embed: ${b.embed.url}`
break
case "equation":
result += `Equation: ${b.equation.expression}`
break
case "file":
result += fileToString("File", b.file)
break
case "heading_1":
result += `# ${richTextArrayToString(b.heading_1.rich_text)}`
break
case "heading_2":
result += `## ${richTextArrayToString(b.heading_2.rich_text)}`
break
case "heading_3":
result += `### ${richTextArrayToString(b.heading_3.rich_text)}`
break
case "image":
result += fileToString("Image", b.image)
break
case "link_preview":
result += b.link_preview.url
break
case "numbered_list_item":
result += `1. ${richTextArrayToString(b.numbered_list_item.rich_text)}`
break
case "paragraph":
result += richTextArrayToString(b.paragraph.rich_text)
break
case "pdf":
result += fileToString("PDF", b.pdf)
break
case "quote":
result += "\"\"\"\n"
result += richTextArrayToString(b.quote.rich_text)
result += "\n\"\"\""
break
case "synced_block":
if (b.synced_block.synced_from !== null) {
await getPageContent(client, b.synced_block.synced_from.block_id, indentation)
}
break
case "to_do":
if (b.to_do.checked) {
result += `[x] ${richTextArrayToString(b.to_do.rich_text)}`
} else {
result += `[ ] ${richTextArrayToString(b.to_do.rich_text)}`
}
break
case "toggle":
result += `> ${richTextArrayToString(b.toggle.rich_text)}`
break
case "video":
result += fileToString("Video", b.video)
break
}
return result.replace("\n", "\n" + " ".repeat(indentation))
}

export function richTextArrayToString(richTextArray: any[]) {
let result = ""
for (let r of richTextArray) {
result += r.plain_text + " "
}
return result
}

function fileToString(prefix: any, file: any) {
let result = ""
if (file.type === "file") {
result = `${prefix}: ${file.file.url} (expires ${file.file.expiry_time})`
} else if (file.type === "external") {
result = `External ${prefix}: ${file.external.url}`
}
if (file.caption !== null && richTextArrayToString(file.caption) !== "") {
result += ` (${richTextArrayToString(file.caption)})`
}
return result
}

async function printTable(client: Client, table: any) {
const children = await client.blocks.children.list({block_id: table.id})
if (table.table.has_column_header && children.results.length > 0) {
printTableRow((children.results[0] as any).table_row, table.table.has_row_header, true)
for (let i = 1; i < children.results.length; i++) {
printTableRow((children.results[i] as any).table_row, table.table.has_row_header, false)
}
} else {
for (let r of children.results) {
printTableRow((r as any).table_row, table.table.has_row_header, false)
}
}
}

function printTableRow(row: any, boldFirst: any, boldAll: any): string {
let result = "|"
if (boldAll) {
for (let c of row.cells) {
result += ` **${richTextArrayToString(c)}** |`
}
let len = result.length
result += "\n|" + "-".repeat(len - 2) + "|"
} else if (boldFirst && row.cells.length > 0) {
result += ` **${richTextArrayToString(row.cells[0])}** |`
for (let i = 1; i < row.cells.length; i++) {
result += ` ${richTextArrayToString(row.cells[i])} |`
}
} else {
for (let c of row.cells) {
result += ` ${richTextArrayToString(c)} |`
}
}
return result
}
7 changes: 2 additions & 5 deletions tool.gpt
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
Name: Sync Notion Pages
Description: Provides access to the Notion API (read-only)
Credential: github.com/gptscript-ai/gateway-oauth2 as notion.read with NOTION_TOKEN as env and notion as integration
Context: syncPages

---
Name: syncPages
Description: Syncs Notion Pages
#!/usr/bin/env npm --prefix ${GPTSCRIPT_TOOL_DIR} run start


#!/usr/bin/env npm run start

0 comments on commit 44da123

Please sign in to comment.