Skip to content

Commit

Permalink
Limit size of video attachment
Browse files Browse the repository at this point in the history
  • Loading branch information
bensteinberg committed Nov 22, 2024
1 parent fd714a4 commit 1ab2b88
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 32 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ Options:
--capture-window-x <number> Width of the browser window Scoop will open to capture, in pixels. (default: 1600)
--capture-window-y <number> Height of the browser window Scoop will open to capture, in pixels. (default: 900)
--max-capture-size <number> Size limit for the capture's exchanges list, in bytes. (default: 209715200)
--max-video-capture-size <number> Size limit for the video attachment, in bytes. Scoop will not capture video attachments larger than this. (default: 209715200)
--auto-scroll <bool> Should Scoop try to scroll through the page? (choices: "true", "false", default: "true")
--auto-play-media <bool> Should Scoop try to autoplay `<audio>` and `<video>` tags? (choices: "true", "false", default: "true")
--grab-secondary-resources <bool> Should Scoop try to download img srcsets and secondary stylesheets? (choices: "true", "false", default: "true")
Expand Down
70 changes: 39 additions & 31 deletions Scoop.js
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ export class Scoop {
'--output', `"${videoFilename}"`,
'--no-check-certificate',
'--proxy', `'http://${this.options.proxyHost}:${this.options.proxyPort}'`,
'--max-filesize', `"${this.options.maxVideoCaptureSize}"`,
this.url
]

Expand Down Expand Up @@ -948,14 +949,19 @@ export class Scoop {
const body = await readFile(`${this.captureTmpFolderPath}${file}`)
const isEntryPoint = false // TODO: Reconsider whether this should be an entry point.

this.addGeneratedExchange(url, httpHeaders, body, isEntryPoint)
videoSaved = true
if (body.length) {
this.addGeneratedExchange(url, httpHeaders, body, isEntryPoint)
videoSaved = true

// Push to map of available videos and subtitles
const index = file.replace('.mp4', '')
// Push to map of available videos and subtitles
const index = file.replace('.mp4', '')

if (!(index in availableVideosAndSubtitles)) {
availableVideosAndSubtitles[index] = []
if (!(index in availableVideosAndSubtitles)) {
availableVideosAndSubtitles[index] = []
}
} else {
videoSaved = false
this.log.warn(`No content in ${file}.`)
}
} catch (err) {
this.log.warn(`Error while creating exchange for ${file}.`)
Expand All @@ -964,7 +970,7 @@ export class Scoop {
}

// Subtitles
if (file.startsWith('video-extracted-') && file.endsWith('.vtt')) {
if (videoSaved && file.startsWith('video-extracted-') && file.endsWith('.vtt')) {
try {
const url = `file:///${file}`
const httpHeaders = new Headers({ 'content-type': 'text/vtt' })
Expand Down Expand Up @@ -998,41 +1004,43 @@ export class Scoop {
//
// Try to add metadata to exchanges
//
try {
metadataParsed = []
if (videoSaved) {
try {
metadataParsed = []

// yt-dlp returns JSONL when there is more than 1 video
for (const line of metadataRaw.split('\n')) {
if (line) {
metadataParsed.push(JSON.parse(line)) // May throw
// yt-dlp returns JSONL when there is more than 1 video
for (const line of metadataRaw.split('\n')) {
if (line) {
metadataParsed.push(JSON.parse(line)) // May throw
}
}
}

if (!metadataParsed.length) {
throw new Error('yt-dlp reported success (returned 0) but produced no metadata.')
}
if (!metadataParsed.length) {
throw new Error('yt-dlp reported success (returned 0) but produced no metadata.')
}

// Merge parsed metadata into a single JSON string and clean it before saving it
const metadataAsJSON = JSON
.stringify(metadataParsed, null, 2)
.replaceAll(this.captureTmpFolderPath, '')
// Merge parsed metadata into a single JSON string and clean it before saving it
const metadataAsJSON = JSON
.stringify(metadataParsed, null, 2)
.replaceAll(this.captureTmpFolderPath, '')

const url = 'file:///video-extracted-metadata.json'
const httpHeaders = new Headers({ 'content-type': 'application/json' })
const body = Buffer.from(metadataAsJSON)
const isEntryPoint = false
const url = 'file:///video-extracted-metadata.json'
const httpHeaders = new Headers({ 'content-type': 'application/json' })
const body = Buffer.from(metadataAsJSON)
const isEntryPoint = false

this.addGeneratedExchange(url, httpHeaders, body, isEntryPoint)
metadataSaved = true
} catch (err) {
this.log.warn('Error while creating exchange for file:///video-extracted-medatadata.json.')
this.log.trace(err)
this.addGeneratedExchange(url, httpHeaders, body, isEntryPoint)
metadataSaved = true
} catch (err) {
this.log.warn('Error while creating exchange for file:///video-extracted-medatadata.json.')
this.log.trace(err)
}
}

//
// Generate summary page
//
if ((videoSaved || metadataSaved || subtitlesSaved) === false) {
if (videoSaved === false) {
this.log.warn('yt-dlp reported success (returned 0), but produced no output.')
return
}
Expand Down
17 changes: 17 additions & 0 deletions Scoop.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ await test('Scoop - capture of a web page.', async (t) => {
app.get('/redirect', (req, res) => res.redirect(parseInt(req.query.statusCode), req.query.path))
app.get('/:path', (req, res) => res.sendFile(FIXTURES_PATH + req.params.path))

const testVideoFixture = await readFile(`${FIXTURES_PATH}video.mp4`)

/*
* TESTS
*/
Expand Down Expand Up @@ -72,6 +74,21 @@ await test('Scoop - capture of a web page.', async (t) => {
'file:///video-extracted-summary.html'
]
assert.deepEqual(expected.filter(url => urls.includes(url)), expected)
const attachment = exchanges.filter(
ex => ex.url === 'file:///video-extracted-1.mp4'
)[0]
assert.deepEqual(attachment.response.body, testVideoFixture)
})

await t.test('Scoop observes maxVideoCaptureSize', async (_t) => {
const { exchanges } = await Scoop.capture(`${URL}/test.html`, { ...options, captureVideoAsAttachment: true, maxVideoCaptureSize: 50000 })
const urls = exchanges.map(ex => ex.url)
const expected = [
'file:///video-extracted-1.mp4',
'file:///video-extracted-metadata.json',
'file:///video-extracted-summary.html'
]
assert.deepEqual(expected.filter(url => urls.includes(url)), [])
})

await t.test('Scoop can be configured for different window dimensions', async (_t) => {
Expand Down
7 changes: 7 additions & 0 deletions bin/cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@ program.addOption(
.default(defaults.maxCaptureSize)
)

program.addOption(
new Option(
'--max-video-capture-size <number>',
'Size limit for the video attachment, in bytes. Scoop will not capture video attachments larger than this.')
.default(defaults.maxVideoCaptureSize)
)

//
// Behaviors
//
Expand Down
1 change: 1 addition & 0 deletions options.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export const defaults = {
captureWindowY: 900,

maxCaptureSize: 200 * 1024 * 1024,
maxVideoCaptureSize: 200 * 1024 * 1024,

autoScroll: true,
autoPlayMedia: true,
Expand Down
4 changes: 3 additions & 1 deletion options.types.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
* @property {number} captureWindowX=1600 - Browser window resolution in pixels: X axis.
* @property {number} captureWindowY=900 - Browser window resolution in pixels: Y axis.
*
* @property {number} maxCaptureSize=209715200 - Maximum size, in bytes, for the exchanges list. Scoop stop intercepting exchanges at this threshold.
* @property {number} maxCaptureSize=209715200 - Maximum size, in bytes, for the exchanges list. Scoop stops intercepting exchanges at this threshold.
*
* @property {number} maxVideoCaptureSize=209715200 - Maximum size, in bytes, for video attachments. Scoop will not capture video attachments larger than this.
*
* @property {boolean} autoScroll=true - Should Scoop try to scroll through the page?
* @property {boolean} autoPlayMedia=true - Should Scoop try to autoplay `<audio>` and `<video>` tags?
Expand Down

0 comments on commit 1ab2b88

Please sign in to comment.