From 0d08aecd56b13695c490c4797eb462b8f6dc794e Mon Sep 17 00:00:00 2001 From: mikiher Date: Wed, 18 Sep 2024 08:28:15 +0300 Subject: [PATCH 1/3] Move from libarchive to node-unrar-js for cbr and node-stream-zip for cbz --- package-lock.json | 9 + package.json | 1 + server/utils/comicBookExtractors.js | 196 +++++++++++++++++++++ server/utils/parsers/parseComicMetadata.js | 68 ++----- 4 files changed, 225 insertions(+), 49 deletions(-) create mode 100644 server/utils/comicBookExtractors.js diff --git a/package-lock.json b/package-lock.json index 90493a065c..6f0a3587fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,7 @@ "graceful-fs": "^4.2.10", "htmlparser2": "^8.0.1", "lru-cache": "^10.0.3", + "node-unrar-js": "^2.0.2", "nodemailer": "^6.9.13", "openid-client": "^5.6.1", "p-throttle": "^4.1.1", @@ -3565,6 +3566,14 @@ "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==", "dev": true }, + "node_modules/node-unrar-js": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/node-unrar-js/-/node-unrar-js-2.0.2.tgz", + "integrity": "sha512-hLNmoJzqaKJnod8yiTVGe9hnlNRHotUi0CreSv/8HtfRi/3JnRC8DvsmKfeGGguRjTEulhZK6zXX5PXoVuDZ2w==", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/nodemailer": { "version": "6.9.13", "resolved": "https://registry.npmjs.org/nodemailer/-/nodemailer-6.9.13.tgz", diff --git a/package.json b/package.json index 70cf40c2d7..752b2f8df5 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "graceful-fs": "^4.2.10", "htmlparser2": "^8.0.1", "lru-cache": "^10.0.3", + "node-unrar-js": "^2.0.2", "nodemailer": "^6.9.13", "openid-client": "^5.6.1", "p-throttle": "^4.1.1", diff --git a/server/utils/comicBookExtractors.js b/server/utils/comicBookExtractors.js new file mode 100644 index 0000000000..3443a570df --- /dev/null +++ b/server/utils/comicBookExtractors.js @@ -0,0 +1,196 @@ +const Path = require('path') +const fs = require('../libs/fsExtra') +const os = require('os') +const Logger = require('../Logger') +const { isWritable } = require('./fileUtils') + +const StreamZip = require('../libs/nodeStreamZip') +const Archive = require('../libs/libarchive/archive') +const unrar = require('node-unrar-js') + +class AbstractComicBookExtractor { + constructor(comicPath) { + this.comicPath = comicPath + } + + async getBuffer() { + if (!(await fs.pathExists(this.comicPath))) { + Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`) + return null + } + try { + return fs.readFile(this.comicPath) + } catch (error) { + Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error) + return null + } + } + + async open() { + throw new Error('Not implemented') + } + + async getFilePaths() { + throw new Error('Not implemented') + } + + async extractToFile(filePath, outputFilePath) { + throw new Error('Not implemented') + } + + async extractToBuffer(filePath) { + throw new Error('Not implemented') + } + + close() { + throw new Error('Not implemented') + } +} + +class CbrComicBookExtractor extends AbstractComicBookExtractor { + constructor(comicPath) { + super(comicPath) + this.archive = null + this.tmpDir = null + } + + async open() { + this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir() + await fs.ensureDir(this.tmpDir) + if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`) + this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir }) + Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`) + } + + async getFilePaths() { + if (!this.archive) return null + const list = this.archive.getFileList() + const fileHeaders = [...list.fileHeaders] + const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name) + Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`) + return filePaths + } + + async extractToBuffer(file) { + if (!this.archive) return null + const extracted = this.archive.extract({ files: [file] }) + const files = [...extracted.files] + const filePath = Path.join(this.tmpDir, files[0].fileHeader.name) + const fileData = await fs.readFile(filePath) + await fs.remove(filePath) + Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`) + return fileData + } + + async extractToFile(file, outputFilePath) { + if (!this.archive) return false + const extracted = this.archive.extract({ files: [file] }) + const files = [...extracted.files] + const fileEntry = files[0] + const extractedFilePath = Path.join(this.tmpDir, fileEntry.fileHeader.name) + await fs.move(extractedFilePath, outputFilePath, { overwrite: true }) + Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) + return true + } + + close() { + Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`) + } +} + +class CbzComicBookExtractor extends AbstractComicBookExtractor { + constructor(comicPath) { + super(comicPath) + this.archive = null + } + + async open() { + const buffer = await this.getBuffer() + this.archive = await Archive.open(buffer) + Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`) + } + + async getFilePaths() { + if (!this.archive) return null + const list = await this.archive.getFilesArray() + const fileNames = list.map((fo) => fo.file._path) + Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`) + return fileNames + } + + async extractToBuffer(file) { + if (!this.archive) return null + const extracted = await this.archive.extractSingleFile(file) + Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`) + return extracted?.fileData + } + + async extractToFile(file, outputFilePath) { + const data = await this.extractToBuffer(file) + if (!data) return false + await fs.writeFile(outputFilePath, data) + Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) + return true + } + + close() { + this.archive?.close() + Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`) + } +} + +class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor { + constructor(comicPath) { + super(comicPath) + this.archive = null + } + + async open() { + this.archive = new StreamZip.async({ file: this.comicPath }) + Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`) + } + + async getFilePaths() { + if (!this.archive) return null + const entries = await this.archive.entries() + const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory) + Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`) + return fileNames + } + + async extractToBuffer(file) { + if (!this.archive) return null + const extracted = await this.archive?.entryData(file) + Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`) + return extracted + } + + async extractToFile(file, outputFilePath) { + if (!this.archive) return false + try { + await this.archive.extract(file, outputFilePath) + Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) + return true + } catch (error) { + Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error) + return false + } + } + + close() { + this.archive?.close() + Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`) + } +} + +function createComicBookExtractor(comicPath) { + const ext = Path.extname(comicPath).toLowerCase() + if (ext === '.cbr') { + return new CbrComicBookExtractor(comicPath) + } else if (ext === '.cbz') { + return new CbzStreamZipComicBookExtractor(comicPath) + } else { + throw new Error(`Unsupported comic book format "${ext}"`) + } +} +module.exports = { createComicBookExtractor } diff --git a/server/utils/parsers/parseComicMetadata.js b/server/utils/parsers/parseComicMetadata.js index d2ba702da6..7ed0d1f50c 100644 --- a/server/utils/parsers/parseComicMetadata.js +++ b/server/utils/parsers/parseComicMetadata.js @@ -5,24 +5,7 @@ const Logger = require('../../Logger') const Archive = require('../../libs/libarchive/archive') const { xmlToJSON } = require('../index') const parseComicInfoMetadata = require('./parseComicInfoMetadata') - -/** - * - * @param {string} filepath - * @returns {Promise} - */ -async function getComicFileBuffer(filepath) { - if (!(await fs.pathExists(filepath))) { - Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`) - return null - } - try { - return fs.readFile(filepath) - } catch (error) { - Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error) - return null - } -} +const { createComicBookExtractor } = require('../comicBookExtractors.js') /** * Extract cover image from comic return true if success @@ -33,22 +16,11 @@ async function getComicFileBuffer(filepath) { * @returns {Promise} */ async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) { - const comicFileBuffer = await getComicFileBuffer(comicPath) - if (!comicFileBuffer) return null - let archive = null try { - archive = await Archive.open(comicFileBuffer) - const fileEntry = await archive.extractSingleFile(comicImageFilepath) - - if (!fileEntry?.fileData) { - Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`) - return false - } - - await fs.writeFile(outputCoverPath, fileEntry.fileData) - - return true + archive = createComicBookExtractor(comicPath) + await archive.open() + return await archive.extractToFile(comicImageFilepath, outputCoverPath) } catch (error) { Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error) return false @@ -67,30 +39,28 @@ module.exports.extractCoverImage = extractCoverImage */ async function parse(ebookFile) { const comicPath = ebookFile.metadata.path - Logger.debug(`Parsing metadata from comic at "${comicPath}"`) - - const comicFileBuffer = await getComicFileBuffer(comicPath) - if (!comicFileBuffer) return null - + Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`) let archive = null try { - archive = await Archive.open(comicFileBuffer) + archive = createComicBookExtractor(comicPath) + await archive.open() - const fileObjects = await archive.getFilesArray() + const filePaths = await archive.getFilePaths() - fileObjects.sort((a, b) => { - return a.file.name.localeCompare(b.file.name, undefined, { + // Sort the file paths in a natural order to get the first image + filePaths.sort((a, b) => { + return a.localeCompare(b, undefined, { numeric: true, sensitivity: 'base' }) }) let metadata = null - const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml') - if (comicInfo) { - const comicInfoEntry = await comicInfo.file.extract() - if (comicInfoEntry?.fileData) { - const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData) + const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml') + if (comicInfoPath) { + const comicInfoData = await archive.extractToBuffer(comicInfoPath) + if (comicInfoData) { + const comicInfoStr = new TextDecoder().decode(comicInfoData) const comicInfoJson = await xmlToJSON(comicInfoStr) if (comicInfoJson) { metadata = parseComicInfoMetadata.parse(comicInfoJson) @@ -104,9 +74,9 @@ async function parse(ebookFile) { metadata } - const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1))) - if (firstImage?.file?._path) { - payload.ebookCoverPath = firstImage.file._path + const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1))) + if (firstImagePath) { + payload.ebookCoverPath = firstImagePath } else { Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`) } From 072028c740dcb66c500580c0134c4f9592d448cc Mon Sep 17 00:00:00 2001 From: mikiher Date: Wed, 18 Sep 2024 10:16:46 +0300 Subject: [PATCH 2/3] Cleanup empty directiories inside the temp extraction dir --- server/utils/comicBookExtractors.js | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/server/utils/comicBookExtractors.js b/server/utils/comicBookExtractors.js index 3443a570df..be60d0ff91 100644 --- a/server/utils/comicBookExtractors.js +++ b/server/utils/comicBookExtractors.js @@ -71,6 +71,17 @@ class CbrComicBookExtractor extends AbstractComicBookExtractor { return filePaths } + async removeEmptyParentDirs(file) { + let dir = Path.dirname(file) + while (dir !== '.') { + const fullDirPath = Path.join(this.tmpDir, dir) + const files = await fs.readdir(fullDirPath) + if (files.length > 0) break + await fs.remove(fullDirPath) + dir = Path.dirname(dir) + } + } + async extractToBuffer(file) { if (!this.archive) return null const extracted = this.archive.extract({ files: [file] }) @@ -78,6 +89,7 @@ class CbrComicBookExtractor extends AbstractComicBookExtractor { const filePath = Path.join(this.tmpDir, files[0].fileHeader.name) const fileData = await fs.readFile(filePath) await fs.remove(filePath) + await this.removeEmptyParentDirs(files[0].fileHeader.name) Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`) return fileData } @@ -86,9 +98,9 @@ class CbrComicBookExtractor extends AbstractComicBookExtractor { if (!this.archive) return false const extracted = this.archive.extract({ files: [file] }) const files = [...extracted.files] - const fileEntry = files[0] - const extractedFilePath = Path.join(this.tmpDir, fileEntry.fileHeader.name) + const extractedFilePath = Path.join(this.tmpDir, files[0].fileHeader.name) await fs.move(extractedFilePath, outputFilePath, { overwrite: true }) + await this.removeEmptyParentDirs(files[0].fileHeader.name) Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`) return true } From 1a8811b69afa267904746130da2a83b2f99709c2 Mon Sep 17 00:00:00 2001 From: advplyr Date: Wed, 18 Sep 2024 14:26:10 -0500 Subject: [PATCH 3/3] Remove unused requires --- server/utils/comicBookExtractors.js | 7 +++---- server/utils/parsers/parseComicMetadata.js | 6 ++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/server/utils/comicBookExtractors.js b/server/utils/comicBookExtractors.js index be60d0ff91..9c18ebddc2 100644 --- a/server/utils/comicBookExtractors.js +++ b/server/utils/comicBookExtractors.js @@ -1,12 +1,11 @@ const Path = require('path') -const fs = require('../libs/fsExtra') const os = require('os') +const unrar = require('node-unrar-js') const Logger = require('../Logger') -const { isWritable } = require('./fileUtils') - +const fs = require('../libs/fsExtra') const StreamZip = require('../libs/nodeStreamZip') const Archive = require('../libs/libarchive/archive') -const unrar = require('node-unrar-js') +const { isWritable } = require('./fileUtils') class AbstractComicBookExtractor { constructor(comicPath) { diff --git a/server/utils/parsers/parseComicMetadata.js b/server/utils/parsers/parseComicMetadata.js index 7ed0d1f50c..38a41b51d8 100644 --- a/server/utils/parsers/parseComicMetadata.js +++ b/server/utils/parsers/parseComicMetadata.js @@ -1,10 +1,8 @@ const Path = require('path') -const globals = require('../globals') -const fs = require('../../libs/fsExtra') const Logger = require('../../Logger') -const Archive = require('../../libs/libarchive/archive') -const { xmlToJSON } = require('../index') const parseComicInfoMetadata = require('./parseComicInfoMetadata') +const globals = require('../globals') +const { xmlToJSON } = require('../index') const { createComicBookExtractor } = require('../comicBookExtractors.js') /**