diff --git a/package-lock.json b/package-lock.json index 434f7fa6..d91d4499 100644 --- a/package-lock.json +++ b/package-lock.json @@ -39,7 +39,8 @@ "react": "^18.2.0", "react-dom": "^18.2.0", "showdown": "^2.1.0", - "stripe": "^16.8.0" + "stripe": "^16.8.0", + "xlsx": "^0.18.5" }, "devDependencies": { "@types/base-64": "^1.0.0", @@ -4019,6 +4020,15 @@ "node": ">=0.4.0" } }, + "node_modules/adler-32": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz", + "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/agent-base": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", @@ -4779,6 +4789,19 @@ "url": "https://opencollective.com/browserslist" } }, + "node_modules/cfb": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz", + "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==", + "license": "Apache-2.0", + "dependencies": { + "adler-32": "~1.3.0", + "crc-32": "~1.2.0" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/chalk": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.0.tgz", @@ -5034,6 +5057,15 @@ "node": ">= 0.12.0" } }, + "node_modules/codepage": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", + "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/collect-v8-coverage": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.2.tgz", @@ -5169,6 +5201,18 @@ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" }, + "node_modules/crc-32": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", + "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", + "license": "Apache-2.0", + "bin": { + "crc32": "bin/crc32.njs" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/create-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", @@ -6863,6 +6907,15 @@ "node": ">= 0.6" } }, + "node_modules/frac": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", + "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/fresh": { "version": "0.5.2", "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", @@ -11844,6 +11897,18 @@ "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", "dev": true }, + "node_modules/ssf": { + "version": "0.11.2", + "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", + "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", + "license": "Apache-2.0", + "dependencies": { + "frac": "~1.1.2" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/ssri": { "version": "10.0.6", "resolved": "https://registry.npmjs.org/ssri/-/ssri-10.0.6.tgz", @@ -12909,6 +12974,24 @@ "node": ">=8" } }, + "node_modules/wmf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", + "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/word": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", + "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, "node_modules/wrap-ansi": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", @@ -13010,6 +13093,27 @@ } } }, + "node_modules/xlsx": { + "version": "0.18.5", + "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz", + "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==", + "license": "Apache-2.0", + "dependencies": { + "adler-32": "~1.3.0", + "cfb": "~1.2.1", + "codepage": "~1.15.0", + "crc-32": "~1.2.1", + "ssf": "~0.11.2", + "wmf": "~1.0.1", + "word": "~0.3.0" + }, + "bin": { + "xlsx": "bin/xlsx.njs" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/xml-name-validator": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", diff --git a/package.json b/package.json index f57d6149..696b6b4d 100644 --- a/package.json +++ b/package.json @@ -61,7 +61,8 @@ "react": "^18.2.0", "react-dom": "^18.2.0", "showdown": "^2.1.0", - "stripe": "^16.8.0" + "stripe": "^16.8.0", + "xlsx": "^0.18.5" }, "devDependencies": { "@types/base-64": "^1.0.0", @@ -98,4 +99,4 @@ "ts-node": "^10.9.1", "typescript": "^5.2.2" } -} \ No newline at end of file +} diff --git a/src/infrastracture/adapters/fileConversion/PrepareDeck.ts b/src/infrastracture/adapters/fileConversion/PrepareDeck.ts index 09f8f5a9..28e20159 100644 --- a/src/infrastracture/adapters/fileConversion/PrepareDeck.ts +++ b/src/infrastracture/adapters/fileConversion/PrepareDeck.ts @@ -6,11 +6,13 @@ import { isImageFile, isPDFFile, isPPTFile, + isXLSXFile, } from '../../../lib/storage/checks'; import { convertPDFToHTML } from './convertPDFToHTML'; import { convertPPTToPDF } from './ConvertPPTToPDF'; import { convertImageToHTML } from './convertImageToHTML'; import { convertPDFToImages } from './convertPDFToImages'; +import { convertXLSXToHTML } from './convertXLSXToHTML'; interface PrepareDeckResult { name: string; @@ -28,6 +30,15 @@ export async function PrepareDeck( continue; } + if (isXLSXFile(file.name)) { + const htmlContent = convertXLSXToHTML(file.contents as Buffer, file.name); + convertedFiles.push({ + name: `${file.name}.html`, + contents: Buffer.from(htmlContent), + }); + continue; + } + if ( isImageFile(file.name) && input.settings.imageQuizHtmlToAnki && diff --git a/src/infrastracture/adapters/fileConversion/___mock/sim.xlsx b/src/infrastracture/adapters/fileConversion/___mock/sim.xlsx new file mode 100644 index 00000000..22bf8309 Binary files /dev/null and b/src/infrastracture/adapters/fileConversion/___mock/sim.xlsx differ diff --git a/src/infrastracture/adapters/fileConversion/__tests__/convertXLSXToHTML.test.ts b/src/infrastracture/adapters/fileConversion/__tests__/convertXLSXToHTML.test.ts new file mode 100644 index 00000000..6bffb6c6 --- /dev/null +++ b/src/infrastracture/adapters/fileConversion/__tests__/convertXLSXToHTML.test.ts @@ -0,0 +1,23 @@ +import { readFileSync } from 'fs'; +import Workspace from '../../../../lib/parser/WorkSpace'; +import { convertXLSXToHTML } from '../convertXLSXToHTML'; +import { join } from 'path'; + +describe('convertXLSXToHTML', () => { + beforeAll(() => { + process.env.WORKSPACE_BASE = '/tmp'; + }); + + it('should convert XLSX to HTML and save the file', async () => { + const workspace = new Workspace(true, 'fs'); + const xlsxPath = join(__dirname, '../___mock/sim.xlsx'); + const buffer = readFileSync(xlsxPath); + const html = convertXLSXToHTML(buffer, join(workspace.location, 'Simple.html')); + expect(html).toContain(''); + expect(html).toContain('Simple.html'); + }); + + afterAll(() => { + delete process.env.WORKSPACE_BASE; + }); +}); \ No newline at end of file diff --git a/src/infrastracture/adapters/fileConversion/convertXLSXToHTML.ts b/src/infrastracture/adapters/fileConversion/convertXLSXToHTML.ts new file mode 100644 index 00000000..84fbad12 --- /dev/null +++ b/src/infrastracture/adapters/fileConversion/convertXLSXToHTML.ts @@ -0,0 +1,33 @@ +import * as XLSX from 'xlsx'; + +type XLSXRow = [string | undefined, string | undefined, ...unknown[]]; + +export function convertXLSXToHTML(buffer: Buffer, title: string): string { + const workbook = XLSX.read(buffer, { type: 'buffer' }); + const sheetName = workbook.SheetNames[0]; + const worksheet = workbook.Sheets[sheetName]; + const jsonData = XLSX.utils.sheet_to_json(worksheet, { + header: 1, + }) as XLSXRow[]; + + return ` + +${title} + + ${jsonData + .map((row: XLSXRow) => { + const front = row[0] || ''; + const back = row[1] || ''; + return ``; + }) + .join('\n')} + +`; +} diff --git a/src/lib/parser/PrepareDeck.ts b/src/lib/parser/PrepareDeck.ts deleted file mode 100644 index 47033f5d..00000000 --- a/src/lib/parser/PrepareDeck.ts +++ /dev/null @@ -1,107 +0,0 @@ -import getDeckFilename from '../anki/getDeckFilename'; -import { DeckParser, DeckParserInput } from './DeckParser'; -import Deck from './Deck'; -import { - isHTMLFile, - isImageFile, - isPDFFile, - isPPTFile, -} from '../storage/checks'; -import { convertPDFToHTML } from '../../infrastracture/adapters/fileConversion/convertPDFToHTML'; -import { convertPDFToImages } from '../../infrastracture/adapters/fileConversion/convertPDFToImages'; -import { convertPPTToPDF } from '../../infrastracture/adapters/fileConversion/ConvertPPTToPDF'; -import { convertImageToHTML } from '../../infrastracture/adapters/fileConversion/convertImageToHTML'; - -interface PrepareDeckResult { - name: string; - apkg: Buffer; - deck: Deck[]; -} - -export async function PrepareDeck( - input: DeckParserInput -): Promise { - const convertedFiles = []; - - for (const file of input.files) { - if (!file.contents) { - continue; - } - - if ( - isImageFile(file.name) && - input.settings.imageQuizHtmlToAnki && - input.noLimits - ) { - const convertedImageContents = await convertImageToHTML( - file.contents?.toString('base64') - ); - convertedFiles.push({ - name: `${file.name}.html`, - contents: convertedImageContents, - }); - } - - if (!isPDFFile(file.name) && !isPPTFile(file.name)) continue; - - if ( - isPDFFile(file.name) && - input.noLimits && - input.settings.vertexAIPDFQuestions - ) { - file.contents = await convertPDFToHTML(file.contents.toString('base64')); - } else if (isPPTFile(file.name)) { - const pdContents = await convertPPTToPDF( - file.name, - file.contents, - input.workspace - ); - - const convertedContents = await convertPDFToImages({ - name: file.name, - workspace: input.workspace, - noLimits: input.noLimits, - contents: pdContents, - }); - convertedFiles.push({ - name: `${file.name}.html`, - contents: convertedContents, - }); - } else if (isPDFFile(file.name)) { - const convertedContents = await convertPDFToImages({ - name: file.name, - workspace: input.workspace, - noLimits: input.noLimits, - contents: file.contents, - }); - convertedFiles.push({ - name: `${file.name}.html`, - contents: convertedContents, - }); - } - } - - input.files.push(...convertedFiles); - const parser = new DeckParser(input); - - if (parser.totalCardCount() === 0) { - if (convertedFiles.length > 0) { - const htmlFile = convertedFiles.find((file) => isHTMLFile(file.name)); - parser.processFirstFile(htmlFile?.name ?? input.name); - } else { - const apkg = await parser.tryExperimental(); - return { - name: getDeckFilename(parser.name ?? input.name), - apkg, - deck: parser.payload, - }; - } - } - - const apkg = await parser.build(input.workspace); - return { - name: getDeckFilename(parser.name), - apkg, - deck: parser.payload, - }; -} diff --git a/src/lib/parser/xlsx/convertXLSXToHTML.ts b/src/lib/parser/xlsx/convertXLSXToHTML.ts new file mode 100644 index 00000000..2f47a3b3 --- /dev/null +++ b/src/lib/parser/xlsx/convertXLSXToHTML.ts @@ -0,0 +1,37 @@ +import * as XLSX from 'xlsx'; + +type XLSXRow = [string | undefined, string | undefined, ...unknown[]]; + +export function convertXLSXToHTML( + contents: string, + title: string +): Promise { + const buffer = Buffer.from(contents, 'binary'); + const workbook = XLSX.read(buffer, { type: 'buffer' }); + const sheetName = workbook.SheetNames[0]; + const worksheet = workbook.Sheets[sheetName]; + const jsonData = XLSX.utils.sheet_to_json(worksheet, { + header: 1, + }) as XLSXRow[]; + + return Promise.resolve(` + +${title} + + ${jsonData + .map((row: XLSXRow) => { + const front = row[0] || ''; + const back = row[1] || ''; + return ``; + }) + .join('\n')} + +`); +} diff --git a/src/lib/storage/checks.ts b/src/lib/storage/checks.ts index 46224ba4..968e4926 100644 --- a/src/lib/storage/checks.ts +++ b/src/lib/storage/checks.ts @@ -58,3 +58,5 @@ export const isImageFile = (name: string) => name.toLowerCase().endsWith('.gif') || name.toLowerCase().endsWith('.bmp') || name.toLowerCase().endsWith('.svg')); + +export const isXLSXFile = (fileName: string) => /.xlsx$/i.test(fileName); diff --git a/src/usecases/uploads/getPackagesFromZip.ts b/src/usecases/uploads/getPackagesFromZip.ts index 6b24c225..215fa744 100644 --- a/src/usecases/uploads/getPackagesFromZip.ts +++ b/src/usecases/uploads/getPackagesFromZip.ts @@ -1,7 +1,7 @@ import { Body } from 'aws-sdk/clients/s3'; import CardOption from '../../lib/parser/Settings/CardOption'; import { ZipHandler } from '../../lib/zip/zip'; -import { PrepareDeck } from '../../lib/parser/PrepareDeck'; +import { PrepareDeck } from '../../infrastracture/adapters/fileConversion/PrepareDeck'; import Package from '../../lib/parser/Package'; import { checkFlashcardsLimits } from '../../lib/User/checkFlashcardsLimits'; import { PackageResult } from './GeneratePackagesUseCase'; @@ -43,7 +43,10 @@ export const getPackagesFromZip = async ( if (deck) { packages.push(new Package(deck.name)); - cardCount += deck.deck.reduce((acc, d) => acc + d.cards.length, 0); + cardCount += deck.deck.reduce( + (acc: number, d: { cards: any[] }) => acc + d.cards.length, + 0 + ); // Checking the limit in place while iterating through the decks checkFlashcardsLimits({ diff --git a/src/usecases/uploads/isZipContentFileSupported.ts b/src/usecases/uploads/isZipContentFileSupported.ts index 8f03067c..b02b8a5f 100644 --- a/src/usecases/uploads/isZipContentFileSupported.ts +++ b/src/usecases/uploads/isZipContentFileSupported.ts @@ -4,6 +4,7 @@ import { isPlainText, isCSVFile, isPDFFile, + isXLSXFile, } from '../../lib/storage/checks'; /** @@ -14,4 +15,5 @@ export const isZipContentFileSupported = (filename: string) => isMarkdownFile(filename) ?? isPlainText(filename) ?? isCSVFile(filename) ?? - isPDFFile(filename); + isPDFFile(filename) ?? + isXLSXFile(filename); diff --git a/src/usecases/uploads/worker.ts b/src/usecases/uploads/worker.ts index 3ae58be6..22fdc37a 100644 --- a/src/usecases/uploads/worker.ts +++ b/src/usecases/uploads/worker.ts @@ -3,7 +3,7 @@ import { UploadedFile } from '../../lib/storage/types'; import CardOption from '../../lib/parser/Settings/CardOption'; import Package from '../../lib/parser/Package'; import fs from 'fs'; -import { PrepareDeck } from '../../lib/parser/PrepareDeck'; +import { PrepareDeck } from '../../infrastracture/adapters/fileConversion/PrepareDeck'; import { isImageFile, isPotentialZipFile,