Skip to content

Commit

Permalink
Merge pull request #629 from mendableai/go-parser-singleton
Browse files Browse the repository at this point in the history
Feat: parser singleton
  • Loading branch information
nickscamara authored Sep 5, 2024
2 parents 4fa917f + 82d6bf4 commit 554a050
Showing 1 changed file with 32 additions and 15 deletions.
47 changes: 32 additions & 15 deletions apps/api/src/lib/html-to-markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,47 @@ import dotenv from 'dotenv';
import { Logger } from './logger';
dotenv.config();

// TODO: create a singleton for the converter
// TODO: add a timeout to the Go parser

class GoMarkdownConverter {
private static instance: GoMarkdownConverter;
private convert: any;

private constructor() {
const goExecutablePath = join(__dirname, 'go-html-to-md/html-to-markdown.so');
const lib = koffi.load(goExecutablePath);
this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);
}

public static getInstance(): GoMarkdownConverter {
if (!GoMarkdownConverter.instance) {
GoMarkdownConverter.instance = new GoMarkdownConverter();
}
return GoMarkdownConverter.instance;
}

public async convertHTMLToMarkdown(html: string): Promise<string> {
return new Promise<string>((resolve, reject) => {
this.convert.async(html, (err: Error, res: string) => {
if (err) {
reject(err);
} else {
resolve(res);
}
});
});
}
}

export async function parseMarkdown(html: string): Promise<string> {
if (!html) {
return '';
}

try {
if (process.env.USE_GO_MARKDOWN_PARSER == "true") {
const goExecutablePath = join(__dirname, 'go-html-to-md/html-to-markdown.so');
const lib = koffi.load(goExecutablePath);

const convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']);

let markdownContent = await new Promise<string>((resolve, reject) => {
convert.async(html, (err: Error, res: string) => {
if (err) {
reject(err);
} else {
resolve(res);
}
});
});
const converter = GoMarkdownConverter.getInstance();
let markdownContent = await converter.convertHTMLToMarkdown(html);

markdownContent = processMultiLineLinks(markdownContent);
markdownContent = removeSkipToContentLinks(markdownContent);
Expand Down

0 comments on commit 554a050

Please sign in to comment.