diff --git a/apps/converter/htmlToMarkdown.js b/apps/converter/htmlToMarkdown.js
index 621cc02cb..30706f5c9 100644
--- a/apps/converter/htmlToMarkdown.js
+++ b/apps/converter/htmlToMarkdown.js
@@ -8,20 +8,29 @@ import TurndownService from 'turndown';
import { fileURLToPath } from 'url';
// @todo Fix this to work locally and live
+const isLagoon = !!process.env.LAGOON;
const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-let lagoon_dirname = __dirname;
-lagoon_dirname = '/app/web/sites/default/files/converted';
+const __dirname = isLagoon
+ ? '/app/web/sites/default/files/converted'
+ : path.dirname(__filename);
async function extractMainContent(htmlString) {
- // Create a new JSDOM instance and parse the HTML string
- const dom = new JSDOM(htmlString);
-
- // Extract the element content
- const mainElement = dom.window.document.querySelector('main');
-
- // Return the inner HTML of the tag, or an empty string if not found
- return mainElement ? mainElement.innerHTML : '';
+ const bodyRegex = /]*>([\s\S]*?)<\/body>/i;
+ const match = htmlString.match(bodyRegex);
+ // Return the captured group (content between tags) or null if no match
+ const html = match ? match[1] : null;
+
+ if (html) {
+ // Create a new JSDOM instance and parse the HTML string
+ const dom = new JSDOM(html);
+ // Extract the element content
+ let mainElement = dom.window.document.querySelector('main');
+ if (!mainElement) {
+ mainElement = dom.window.document.querySelector('article');
+ }
+ // Return the inner HTML of the tag, or an empty string if not found
+ return mainElement ? mainElement.innerHTML : '';
+ }
}
async function getImageExtension(buffer) {
@@ -73,7 +82,7 @@ export async function htmlToMarkdown(url) {
const html = await extractMainContent(fullHtml);
// Generate folder name based on HTML content
const folderName = generateFolderName(html);
- const outputDir = path.join(lagoon_dirname, folderName);
+ const outputDir = path.join(__dirname, folderName);
const imagesDir = path.join(outputDir, 'images');
await fs.ensureDir(outputDir);
diff --git a/apps/converter/wordToMarkdown.js b/apps/converter/wordToMarkdown.js
index a3d29cb70..4ea4c73d9 100644
--- a/apps/converter/wordToMarkdown.js
+++ b/apps/converter/wordToMarkdown.js
@@ -7,11 +7,11 @@ import TurndownService from 'turndown';
import { fileURLToPath } from 'url';
// @todo Fix this to work locally and live
-// const isLagoon = !!process.env.LAGOON;
+const isLagoon = !!process.env.LAGOON;
const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-let lagoon_dirname = __dirname;
-lagoon_dirname = '/app/web/sites/default/files/converted';
+const __dirname = isLagoon
+ ? '/app/web/sites/default/files/converted'
+ : path.dirname(__filename);
async function getImageExtension(buffer) {
const type = await imageType(buffer);
@@ -31,7 +31,7 @@ export async function wordToMarkdown(filePath) {
const folderName = generateFolderName(filePath);
// const outputDir = path.join(__dirname, folderName);
- const outputDir = path.join(lagoon_dirname, folderName);
+ const outputDir = path.join(__dirname, folderName);
const imagesDir = path.join(outputDir, 'images');
await fs.ensureDir(outputDir);