diff --git a/index.html b/index.html index 628205d..a961450 100644 --- a/index.html +++ b/index.html @@ -9,7 +9,7 @@ // See https://github.com/w3c/respec/wiki/ for how to configure ReSpec var respecConfig = { group: "webrtc", - xref: ["html", "infra", "permissions", "dom", "mediacapture-streams", "webaudio", "webidl"], + xref: ["html", "infra", "permissions", "dom", "image-capture", "mediacapture-streams", "webaudio", "webcodecs", "webidl"], edDraftURI: "https://w3c.github.io/mediacapture-extensions/", editors: [ {name: "Jan-Ivar Bruaroey", company: "Mozilla Corporation", w3cid: 79152}, @@ -588,5 +588,431 @@
partial interface VideoFrame { + readonly attribute FrozenArray<DetectedFace>? detectedFaces; +};+
detectedFaces
of type {{FrozenArray}}<{{DetectedFace}}>, readonly, nullableA series of detected faces in this video frame.
+dictionary DetectedFace { + required long id; + required float probability; + FrozenArray<Point2D> contour; + FrozenArray<Point2D> mesh; + FrozenArray<DetectedFaceLandmark> landmarks; +};+
id
of type {{long}}A unique identifer of the face. + If the same face can be found in successive frames, + id is set to the same value for the face in all frames. + The special value of zero indicates that the face is not tracked + and several distinct faces can have the id of zero. + Typically this means that the face detection engine does not + support face tracking.
+probability
of type {{float}}A confidence value in range [0,1]. + The approximate probability of the detected face being really + a human face. + The special value of exact zero indicates that the probability is + not estimated or known.
+contour
of type {{FrozenArray}}<{{Point2D}}>A contour surrounding the detected face. + An example of a valid case is a four-point rectangle aligned to + the image axes which is the bounding box supported in many + platforms. + However, the API does not guarantee that the returned data is + anyhow aligned to image axes. + The points are given in image coordinates.
+If the current {{MediaTrackSettings/faceDetectionMode}} setting + of the {{MediaStreamTrack}} object is not + {{FaceDetectionMode/"contour"}} or {{FaceDetectionMode/"mesh"}}, + then this member will not exist.
+The length of the array is controlled by the current + {{MediaTrackSettings/faceDetectionNumContourPoints}} setting of + the {{MediaStreamTrack}} object.
+mesh
of type {{FrozenArray}}<{{Point2D}}>Arbitrary points on the face. + When only a single point is returned, + it SHOULD be located at the center of the face. + The points are given in image coordinates.
+If the current {{MediaTrackSettings/faceDetectionMode}} setting + of the {{MediaStreamTrack}} object is not + {{FaceDetectionMode/"mesh"}}, + then this member will not exist.
+landmarks
of type {{FrozenArray}}<{{DetectedFaceLandmark}}>A series of features of interest related to the detected + face.
+If the current {{MediaTrackSettings/faceDetectionLandmarks}}
+ setting of the {{MediaStreamTrack}} object is not
+ true
,
+ then this member will not exist.
dictionary DetectedFaceLandmark { + required FrozenArray<Point2D> contour; + FaceLandmark type; +};+
contour
of type {{FrozenArray}}<{{Point2D}}>A point at the center of the detected landmark, or + a sequence of points defining the vertices of a simple polygon + surrounding the landmark in either a clockwise or counter-clockwise + direction.
+The length of the array is controlled by the current + {{MediaTrackSettings/faceDetectionNumLandmarkPoints}} setting of + the {{MediaStreamTrack}} object.
+type
of type {{FaceLandmark}}The type of the detected landmark.
enum FaceLandmark { + "eye", + "eyeLeft", + "eyeRight", + "mouth", + "nose" +};+
eye
The landmark is identified as a human eye, + either left or right.
+eyeLeft
The landmark is identified as a human left eye.
eyeRight
The landmark is identified as a human right eye.
mouth
The landmark is identified as a human mouth.
nose
The landmark is identified as a human nose.
partial dictionary MediaTrackSupportedConstraints { + boolean faceDetectionMode = true; + boolean faceDetectionLandmarks = true; + boolean faceDetectionMaxNumFaces = true; + boolean faceDetectionNumContourPoints = true; + boolean faceDetectionNumLandmarkPoints = true; +};+
faceDetectionMode
of type {{boolean}}, defaulting to true
Whether face detection mode constraining is + recognized.
+faceDetectionLandmarks
of type {{boolean}}, defaulting to true
Whether face landmark detection mode constraining is + recognized.
+faceDetectionMaxNumFaces
of type {{boolean}}, defaulting to true
Whether maximum number of face detection faces + constraining is recognized.
+faceDetectionNumContourPoints
of type {{boolean}}, defaulting to true
Whether number of face detection contour points + constraining is recognized.
+faceDetectionNumLandmarkPoints
of type {{boolean}}, defaulting to true
Whether number of face detection landmark points + constraining is recognized.
+partial dictionary MediaTrackCapabilities { + sequence<DOMString> faceDetectionMode; + sequence<boolean> faceDetectionLandmarks; + ULongRange faceDetectionMaxNumFaces; + ULongRange faceDetectionNumContourPoints; + ULongRange faceDetectionNumLandmarkPoints; +};+
faceDetectionMode
of type sequence<{{DOMString}}>A sequence of supported face detection modes. + Each string MUST be one of the members of + {{FaceDetectionMode}}.
+faceDetectionLandmarks
of type sequence<{{boolean}}>A sequence of supported face landmark detection modes.
+ If the source cannot do landmark detection,
+ a single false
is reported.
+ If the landmark detection cannot be turned off,
+ a single true
is reported.
+ If the script can control the detection,
+ both false
and true
are reported as
+ possible values.
faceDetectionMaxNumFaces
of type {{ULongRange}}A supported range for the maximum number of face detection + faces.
+faceDetectionNumContourPoints
of type {{ULongRange}}A supported range for the number of face detection contour + points.
+faceDetectionNumLandmarkPoints
of type {{ULongRange}}A supported range for the number of face detection landmark + points.
+partial dictionary MediaTrackConstraintSet { + ConstrainDOMString faceDetectionMode; + ConstrainBoolean faceDetectionLandmarks; + ConstrainULong faceDetectionMaxNumFaces; + ConstrainULong faceDetectionNumContourPoints; + ConstrainULong faceDetectionNumLandmarkPoints; +};+
faceDetectionMode
of type {{ConstrainDOMString}}The string MUST be one of the members of {{FaceDetectionMode}}. + See face detection mode constrainable property.
+faceDetectionLandmarks
of type {{ConstrainBoolean}}See face landmark detection mode constrainable property.
+faceDetectionMaxNumFaces
of type {{ConstrainULong}}See maximum number of face detection faces constrainable property.
+faceDetectionNumContourPoints
of type {{ConstrainULong}}See number of face detection contour points constrainable property.
+faceDetectionNumLandmarkPoints
of type {{ConstrainULong}}See number of face detection landmark points constrainable property.
+partial dictionary MediaTrackSettings { + DOMString faceDetectionMode; + boolean faceDetectionLandmarks; + long faceDetectionMaxNumFaces; + long faceDetectionNumContourPoints; + long faceDetectionNumLandmarkPoints; +};+
faceDetectionMode
of type {{DOMString}}Current face detection mode setting. + The string MUST be one of the members of {{FaceDetectionMode}}.
+faceDetectionLandmarks
of type {{boolean}}Current face landmark detection mode setting.
+faceDetectionMaxNumFaces
of type {{long}}Current maximum number of face detection faces setting.
+faceDetectionNumContourPoints
of type {{long}}Current number of face detection contour points setting.
+faceDetectionNumLandmarkPoints
of type {{long}}Current number of face detection landmark points setting.
+enum FaceDetectionMode { + "none", + "presence", + "contour", + "mesh" +};+
none
This source does not offer human face detection. + For setting, this is interpreted as a command to turn of + the detection.
+presence
This source offers human face presence detection, + or such a mode is requested.
+This mode may be useful with a true
+ face landmark detection mode in order to detect human face
+ landmarks but not contours or meshes.
contour
This source offers human face contour detection, + or such a mode is requested.
+mesh
This source offers human face mesh and contour detection, + or such a mode is requested.
+It is possible to disable human face contour + detection in this mode by setting the number of face detection + contour points to zero.
+Face detection mode describes which face details + (presence, contour points, mesh points) are to be detected.
+Face landmark detection mode describes whether human + face landmarks are to be detected and exposed.
+Maximum number of face detection faces descibes how + many human faces are to be detected and exposed at most.
+Number of face detection contour points descibes how + many human faces contour points are to be detected and exposed per + human face.
+Number of face detection landmark points descibes how + many human faces landmark points are to be detected and exposed per + human face landmark.
++// main.js: +// Check if face detection is supported by the browser +const supports = navigator.mediaDevices.getSupportedConstraints(); +if (supports.faceDetectionMode && + supports.faceDetectionNumContourPoints) { + // Browser supports face contour detection. +} else { + throw('Face contour detection is not supported'); +} + +// Open camera with face detection enabled +const stream = await navigator.mediaDevices.getUserMedia({ + video: { + faceDetectionMode: 'contour', + faceDetectionNumContourPoints: {exact: 4} + } +}); +const [videoTrack] = stream.getVideoTracks(); + +// Use a video worker and show to user. +const videoElement = document.querySelector('video'); +const videoWorker = new Worker('video-worker.js'); +videoWorker.postMessage({track: videoTrack}, [videoTrack]); +const {data} = await new Promise(r => videoWorker.onmessage); +videoElement.srcObject = new MediaStream([data.videoTrack]); + +// video-worker.js: +self.onmessage = async ({data: {track}}) => { + const generator = new VideoTrackGenerator(); + parent.postMessage({videoTrack: generator.track}, [generator.track]); + const {readable} = new MediaStreamTrackProcessor({track}); + const transformer = new TransformStream({ + async transform(frame, controller) { + for (const face of frame.detectedFaces) { + console.log( + `Face @ (${face.contour[0].x}, ${face.contour[0].y}), ` + + `(${face.contour[1].x}, ${face.contour[1].y}), ` + + `(${face.contour[2].x}, ${face.contour[2].y}), ` + + `(${face.contour[3].x}, ${face.contour[3].y})`); + } + controller.enqueue(frame); + } + }); + await readable.pipeThrough(transformer).pipeTo(generator.writable); +}; ++