Skip to content

Commit

Permalink
feat: parse mp4 webvtt segments (#1545)
Browse files Browse the repository at this point in the history
  • Loading branch information
adrums86 authored Oct 29, 2024
1 parent 8456cb3 commit 9f1c4ad
Show file tree
Hide file tree
Showing 10 changed files with 449 additions and 22 deletions.
15 changes: 12 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"global": "^4.4.0",
"m3u8-parser": "^7.2.0",
"mpd-parser": "^1.3.1",
"mux.js": "7.0.3",
"mux.js": "7.1.0",
"video.js": "^7 || ^8"
},
"peerDependencies": {
Expand Down
53 changes: 53 additions & 0 deletions src/media-segment-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ export const REQUEST_ERRORS = {
ABORTED: -102
};

const WEB_VTT_CODEC = 'wvtt';

/**
* Abort all requests
*
Expand Down Expand Up @@ -164,6 +166,43 @@ const handleKeyResponse = (segment, objects, finishProcessingFn, triggerSegmentE
return finishProcessingFn(null, segment);
};

/**
* Processes an mp4 init segment depending on the codec through the transmuxer.
*
* @param {Object} segment init segment to process
* @param {string} codec the codec of the text segments
*/
const initMp4Text = (segment, codec) => {
if (codec === WEB_VTT_CODEC) {
segment.transmuxer.postMessage({
action: 'initMp4WebVttParser',
data: segment.map.bytes
});
}
};

/**
* Parses an mp4 text segment with the transmuxer and calls the doneFn from
* the segment loader.
*
* @param {Object} segment the text segment to parse
* @param {string} codec the codec of the text segment
* @param {Function} doneFn the doneFn passed from the segment loader
*/
const parseMp4TextSegment = (segment, codec, doneFn) => {
if (codec === WEB_VTT_CODEC) {
workerCallback({
action: 'getMp4WebVttText',
data: segment.bytes,
transmuxer: segment.transmuxer,
callback: ({data, mp4VttCues}) => {
segment.bytes = data;
doneFn(null, segment, { mp4VttCues });
}
});
}
};

const parseInitSegment = (segment, callback) => {
const type = detectContainerForBytes(segment.map.bytes);

Expand Down Expand Up @@ -206,6 +245,10 @@ const parseInitSegment = (segment, callback) => {
segment.map.timescales[track.id] = track.timescale;
}

if (track.type === 'text') {
initMp4Text(segment, track.codec);
}

});

return callback(null);
Expand Down Expand Up @@ -468,6 +511,16 @@ const handleSegmentBytes = ({
if (isLikelyFmp4MediaSegment(bytesAsUint8Array)) {
segment.isFmp4 = true;
const {tracks} = segment.map;
const isMp4TextSegment = tracks.text && (!tracks.audio || !tracks.video);

if (isMp4TextSegment) {
dataFn(segment, {
data: bytesAsUint8Array,
type: 'text'
});
parseMp4TextSegment(segment, tracks.text.codec, doneFn);
return;
}

const trackInfo = {
isFmp4: true,
Expand Down
39 changes: 39 additions & 0 deletions src/transmuxer-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import {Transmuxer} from 'mux.js/lib/mp4/transmuxer';
import CaptionParser from 'mux.js/lib/mp4/caption-parser';
import WebVttParser from 'mux.js/lib/mp4/webvtt-parser';
import mp4probe from 'mux.js/lib/mp4/probe';
import tsInspector from 'mux.js/lib/tools/ts-inspector.js';
import {
Expand Down Expand Up @@ -207,6 +208,44 @@ class MessageHandlers {
}, [segment.buffer]);
}

/**
* Initializes the WebVttParser and passes the init segment.
*
* @param {Uint8Array} data mp4 boxed WebVTT init segment data
*/
initMp4WebVttParser(data) {
if (!this.webVttParser) {
this.webVttParser = new WebVttParser();
}
const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength);

// Set the timescale for the parser.
// This can be called repeatedly in order to set and re-set the timescale.
this.webVttParser.init(segment);
}

/**
* Parse an mp4 encapsulated WebVTT segment and return an array of cues.
*
* @param {Uint8Array} data a text/webvtt segment
* @return {Object[]} an array of parsed cue objects
*/
getMp4WebVttText(data) {
if (!this.webVttParser) {
// timescale might not be set yet if the parser is created before an init segment is passed.
// default timescale is 90k.
this.webVttParser = new WebVttParser();
}
const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength);
const parsed = this.webVttParser.parseSegment(segment);

this.self.postMessage({
action: 'getMp4WebVttText',
mp4VttCues: parsed || [],
data: segment.buffer
}, [segment.buffer]);
}

probeMp4StartTime({timescales, data}) {
const startTime = mp4probe.startTime(timescales, data);

Expand Down
74 changes: 58 additions & 16 deletions src/vtt-segment-loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@ export default class VTTSegmentLoader extends SegmentLoader {
this.shouldSaveSegmentTimingInfo_ = false;
}

createTransmuxer_() {
// don't need to transmux any subtitles
return null;
}

/**
* Indicates which time ranges are buffered
*
Expand Down Expand Up @@ -282,6 +277,11 @@ export default class VTTSegmentLoader extends SegmentLoader {
}

const segmentInfo = this.pendingSegment_;
const isMp4WebVttSegmentWithCues = result.mp4VttCues && result.mp4VttCues.length;

if (isMp4WebVttSegmentWithCues) {
segmentInfo.mp4VttCues = result.mp4VttCues;
}

// although the VTT segment loader bandwidth isn't really used, it's good to
// maintain functionality between segment loaders
Expand Down Expand Up @@ -334,11 +334,13 @@ export default class VTTSegmentLoader extends SegmentLoader {
return;
}

this.updateTimeMapping_(
segmentInfo,
this.syncController_.timelines[segmentInfo.timeline],
this.playlist_
);
if (!isMp4WebVttSegmentWithCues) {
this.updateTimeMapping_(
segmentInfo,
this.syncController_.timelines[segmentInfo.timeline],
this.playlist_
);
}

if (segmentInfo.cues.length) {
segmentInfo.timingInfo = {
Expand Down Expand Up @@ -380,14 +382,49 @@ export default class VTTSegmentLoader extends SegmentLoader {
this.handleAppendsDone_();
}

handleData_() {
// noop as we shouldn't be getting video/audio data captions
// that we do not support here.
handleData_(simpleSegment, result) {
const isVttType = simpleSegment && simpleSegment.type === 'vtt';
const isTextResult = result && result.type === 'text';
const isFmp4VttSegment = isVttType && isTextResult;
// handle segment data for fmp4 encapsulated webvtt

if (isFmp4VttSegment) {
super.handleData_(simpleSegment, result);
}
}

updateTimingInfoEnd_() {
// noop
}

/**
* Utility function for converting mp4 webvtt cue objects into VTTCues.
*
* @param {Object} segmentInfo with mp4 webvtt cues for parsing into VTTCue objecs
*/
parseMp4VttCues_(segmentInfo) {
const timestampOffset = this.sourceUpdater_.videoTimestampOffset() === null ?
this.sourceUpdater_.audioTimestampOffset() :
this.sourceUpdater_.videoTimestampOffset();

segmentInfo.mp4VttCues.forEach((cue) => {
const start = cue.start + timestampOffset;
const end = cue.end + timestampOffset;
const vttCue = new window.VTTCue(start, end, cue.cueText);

if (cue.settings) {
cue.settings.split(' ').forEach((cueSetting) => {
const keyValString = cueSetting.split(':');
const key = keyValString[0];
const value = keyValString[1];

vttCue[key] = isNaN(value) ? value : Number(value);
});
}
segmentInfo.cues.push(vttCue);
});
}

/**
* Uses the WebVTT parser to parse the segment response
*
Expand All @@ -406,6 +443,14 @@ export default class VTTSegmentLoader extends SegmentLoader {
throw new NoVttJsError();
}

segmentInfo.cues = [];
segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 };

if (segmentInfo.mp4VttCues) {
this.parseMp4VttCues_(segmentInfo);
return;
}

if (typeof window.TextDecoder === 'function') {
decoder = new window.TextDecoder('utf8');
} else {
Expand All @@ -419,9 +464,6 @@ export default class VTTSegmentLoader extends SegmentLoader {
decoder
);

segmentInfo.cues = [];
segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 };

parser.oncue = segmentInfo.cues.push.bind(segmentInfo.cues);
parser.ontimestampmap = (map) => {
segmentInfo.timestampmap = map;
Expand Down
85 changes: 84 additions & 1 deletion test/media-segment-request.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import {
mp4VideoInit,
muxed as muxedSegment,
webmVideo,
webmVideoInit
webmVideoInit,
mp4WebVttInit,
mp4WebVtt
} from 'create-test-data!segments';
// needed for plugin registration
import '../src/videojs-http-streaming';
Expand Down Expand Up @@ -1863,3 +1865,84 @@ QUnit.test('can get emsg ID3 frames from fmp4 audio segment', function(assert) {
// Simulate receiving the init segment after the media
this.standardXHRResponse(initReq, mp4AudioInit());
});

QUnit.test('can get webvtt text from an fmp4 segment', function(assert) {
const done = assert.async();
// expected frame data
const expectedCues = [
{
cueText: '2024-10-16T05:13:50Z\nen # 864527815',
end: 1729055630.9,
settings: undefined,
start: 1729055630
},
{
cueText: '2024-10-16T05:13:51Z\nen # 864527815',
end: 1729055631.9,
settings: undefined,
start: 1729055631
}
];
const transmuxer = new videojs.EventTarget();

transmuxer.postMessage = (event) => {
if (event.action === 'getMp4WebVttText') {
transmuxer.trigger({
type: 'message',
data: {
action: 'getMp4WebVttText',
data: event.data,
mp4VttCues: expectedCues
}
});
}

if (event.action === 'probeMp4Tracks') {
transmuxer.trigger({
type: 'message',
data: {
action: 'probeMp4Tracks',
data: event.data,
tracks: [{type: 'text', codec: 'wvtt'}]
}
});
}
};

mediaSegmentRequest({
xhr: this.xhr,
xhrOptions: this.xhrOptions,
decryptionWorker: this.mockDecrypter,
segment: {
transmuxer,
resolvedUri: 'mp4WebVtt.mp4',
map: {
resolvedUri: 'mp4WebVttInit.mp4'
},
isFmp4: true
},
progressFn: this.noop,
trackInfoFn: this.noop,
timingInfoFn: this.noop,
id3Fn: this.noop,
captionsFn: this.noop,
dataFn: this.noop,
doneFn: (_e, _s, result) => {
assert.equal(result.mp4VttCues.length, 2, 'there are 2 mp4VttCues');
assert.deepEqual(result.mp4VttCues, expectedCues, 'mp4VttCues are expected values');
transmuxer.off();
done();
},
triggerSegmentEventFn: this.noop
});
assert.equal(this.requests.length, 2, 'there are two requests');

const initReq = this.requests.shift();
const segmentReq = this.requests.shift();

assert.equal(initReq.uri, 'mp4WebVttInit.mp4', 'the first request is for the init segment');
assert.equal(segmentReq.uri, 'mp4WebVtt.mp4', 'the second request is for a segment');

this.standardXHRResponse(initReq, mp4WebVttInit());
this.standardXHRResponse(segmentReq, mp4WebVtt());
});
Binary file added test/segments/mp4WebVtt.mp4
Binary file not shown.
Binary file added test/segments/mp4WebVttInit.mp4
Binary file not shown.
Loading

0 comments on commit 9f1c4ad

Please sign in to comment.