Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to node-unrar-js for cbr and node-stream-zip for cbz #3435

Merged
merged 3 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"graceful-fs": "^4.2.10",
"htmlparser2": "^8.0.1",
"lru-cache": "^10.0.3",
"node-unrar-js": "^2.0.2",
"nodemailer": "^6.9.13",
"openid-client": "^5.6.1",
"p-throttle": "^4.1.1",
Expand Down
207 changes: 207 additions & 0 deletions server/utils/comicBookExtractors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
const Path = require('path')
const os = require('os')
const unrar = require('node-unrar-js')
const Logger = require('../Logger')
const fs = require('../libs/fsExtra')
const StreamZip = require('../libs/nodeStreamZip')
const Archive = require('../libs/libarchive/archive')
const { isWritable } = require('./fileUtils')

class AbstractComicBookExtractor {
constructor(comicPath) {
this.comicPath = comicPath
}

async getBuffer() {
if (!(await fs.pathExists(this.comicPath))) {
Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`)
return null
}
try {
return fs.readFile(this.comicPath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error)
return null
}
}

async open() {
throw new Error('Not implemented')
}

async getFilePaths() {
throw new Error('Not implemented')
}

async extractToFile(filePath, outputFilePath) {
throw new Error('Not implemented')
}

async extractToBuffer(filePath) {
throw new Error('Not implemented')
}

close() {
throw new Error('Not implemented')
}
}

class CbrComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
this.tmpDir = null
}

async open() {
this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir()
await fs.ensureDir(this.tmpDir)
if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`)
this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir })
Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`)
}

async getFilePaths() {
if (!this.archive) return null
const list = this.archive.getFileList()
const fileHeaders = [...list.fileHeaders]
const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name)
Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`)
return filePaths
}

async removeEmptyParentDirs(file) {
let dir = Path.dirname(file)
while (dir !== '.') {
const fullDirPath = Path.join(this.tmpDir, dir)
const files = await fs.readdir(fullDirPath)
if (files.length > 0) break
await fs.remove(fullDirPath)
dir = Path.dirname(dir)
}
}

async extractToBuffer(file) {
if (!this.archive) return null
const extracted = this.archive.extract({ files: [file] })
const files = [...extracted.files]
const filePath = Path.join(this.tmpDir, files[0].fileHeader.name)
const fileData = await fs.readFile(filePath)
await fs.remove(filePath)
await this.removeEmptyParentDirs(files[0].fileHeader.name)
Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`)
return fileData
}

async extractToFile(file, outputFilePath) {
if (!this.archive) return false
const extracted = this.archive.extract({ files: [file] })
const files = [...extracted.files]
const extractedFilePath = Path.join(this.tmpDir, files[0].fileHeader.name)
await fs.move(extractedFilePath, outputFilePath, { overwrite: true })
await this.removeEmptyParentDirs(files[0].fileHeader.name)
Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
}

close() {
Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}

class CbzComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
}

async open() {
const buffer = await this.getBuffer()
this.archive = await Archive.open(buffer)
Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`)
}

async getFilePaths() {
if (!this.archive) return null
const list = await this.archive.getFilesArray()
const fileNames = list.map((fo) => fo.file._path)
Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
return fileNames
}

async extractToBuffer(file) {
if (!this.archive) return null
const extracted = await this.archive.extractSingleFile(file)
Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`)
return extracted?.fileData
}

async extractToFile(file, outputFilePath) {
const data = await this.extractToBuffer(file)
if (!data) return false
await fs.writeFile(outputFilePath, data)
Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
}

close() {
this.archive?.close()
Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}

class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
}

async open() {
this.archive = new StreamZip.async({ file: this.comicPath })
Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`)
}

async getFilePaths() {
if (!this.archive) return null
const entries = await this.archive.entries()
const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory)
Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
return fileNames
}

async extractToBuffer(file) {
if (!this.archive) return null
const extracted = await this.archive?.entryData(file)
Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`)
return extracted
}

async extractToFile(file, outputFilePath) {
if (!this.archive) return false
try {
await this.archive.extract(file, outputFilePath)
Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
} catch (error) {
Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error)
return false
}
}

close() {
this.archive?.close()
Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}

function createComicBookExtractor(comicPath) {
const ext = Path.extname(comicPath).toLowerCase()
if (ext === '.cbr') {
return new CbrComicBookExtractor(comicPath)
} else if (ext === '.cbz') {
return new CbzStreamZipComicBookExtractor(comicPath)
} else {
throw new Error(`Unsupported comic book format "${ext}"`)
}
}
module.exports = { createComicBookExtractor }
74 changes: 21 additions & 53 deletions server/utils/parsers/parseComicMetadata.js
Original file line number Diff line number Diff line change
@@ -1,28 +1,9 @@
const Path = require('path')
const globals = require('../globals')
const fs = require('../../libs/fsExtra')
const Logger = require('../../Logger')
const Archive = require('../../libs/libarchive/archive')
const { xmlToJSON } = require('../index')
const parseComicInfoMetadata = require('./parseComicInfoMetadata')

/**
*
* @param {string} filepath
* @returns {Promise<Buffer>}
*/
async function getComicFileBuffer(filepath) {
if (!(await fs.pathExists(filepath))) {
Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`)
return null
}
try {
return fs.readFile(filepath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error)
return null
}
}
const globals = require('../globals')
const { xmlToJSON } = require('../index')
const { createComicBookExtractor } = require('../comicBookExtractors.js')

/**
* Extract cover image from comic return true if success
Expand All @@ -33,22 +14,11 @@ async function getComicFileBuffer(filepath) {
* @returns {Promise<boolean>}
*/
async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null

let archive = null
try {
archive = await Archive.open(comicFileBuffer)
const fileEntry = await archive.extractSingleFile(comicImageFilepath)

if (!fileEntry?.fileData) {
Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
return false
}

await fs.writeFile(outputCoverPath, fileEntry.fileData)

return true
archive = createComicBookExtractor(comicPath)
await archive.open()
return await archive.extractToFile(comicImageFilepath, outputCoverPath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error)
return false
Expand All @@ -67,30 +37,28 @@ module.exports.extractCoverImage = extractCoverImage
*/
async function parse(ebookFile) {
const comicPath = ebookFile.metadata.path
Logger.debug(`Parsing metadata from comic at "${comicPath}"`)

const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null

Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`)
let archive = null
try {
archive = await Archive.open(comicFileBuffer)
archive = createComicBookExtractor(comicPath)
await archive.open()

const fileObjects = await archive.getFilesArray()
const filePaths = await archive.getFilePaths()

fileObjects.sort((a, b) => {
return a.file.name.localeCompare(b.file.name, undefined, {
// Sort the file paths in a natural order to get the first image
filePaths.sort((a, b) => {
return a.localeCompare(b, undefined, {
numeric: true,
sensitivity: 'base'
})
})

let metadata = null
const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml')
if (comicInfo) {
const comicInfoEntry = await comicInfo.file.extract()
if (comicInfoEntry?.fileData) {
const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml')
if (comicInfoPath) {
const comicInfoData = await archive.extractToBuffer(comicInfoPath)
if (comicInfoData) {
const comicInfoStr = new TextDecoder().decode(comicInfoData)
const comicInfoJson = await xmlToJSON(comicInfoStr)
if (comicInfoJson) {
metadata = parseComicInfoMetadata.parse(comicInfoJson)
Expand All @@ -104,9 +72,9 @@ async function parse(ebookFile) {
metadata
}

const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
if (firstImage?.file?._path) {
payload.ebookCoverPath = firstImage.file._path
const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1)))
if (firstImagePath) {
payload.ebookCoverPath = firstImagePath
} else {
Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`)
}
Expand Down
Loading