diff --git a/index.html b/index.html index 628205d..a961450 100644 --- a/index.html +++ b/index.html @@ -9,7 +9,7 @@ // See https://github.com/w3c/respec/wiki/ for how to configure ReSpec var respecConfig = { group: "webrtc", - xref: ["html", "infra", "permissions", "dom", "mediacapture-streams", "webaudio", "webidl"], + xref: ["html", "infra", "permissions", "dom", "image-capture", "mediacapture-streams", "webaudio", "webcodecs", "webidl"], edDraftURI: "https://w3c.github.io/mediacapture-extensions/", editors: [ {name: "Jan-Ivar Bruaroey", company: "Mozilla Corporation", w3cid: 79152}, @@ -588,5 +588,431 @@

Exposing change of MediaStreamTrack configuration

+
+

Face detection

+
+

{{VideoFrame}}

+
partial interface VideoFrame {
+  readonly attribute FrozenArray<DetectedFace>? detectedFaces;
+};
+
+

Attributes

+
+
detectedFaces of type {{FrozenArray}}<{{DetectedFace}}>, readonly, nullable
+
+

A series of detected faces in this video frame.

+
+
+
+
+
+

{{DetectedFace}}

+
dictionary DetectedFace {
+  required long                     id;
+  required float                    probability;
+  FrozenArray<Point2D>              contour;
+  FrozenArray<Point2D>              mesh;
+  FrozenArray<DetectedFaceLandmark> landmarks;
+};
+
+

Dictionary {{DetectedFace}} Members

+
+
id of type {{long}}
+
+

A unique identifer of the face. + If the same face can be found in successive frames, + id is set to the same value for the face in all frames. + The special value of zero indicates that the face is not tracked + and several distinct faces can have the id of zero. + Typically this means that the face detection engine does not + support face tracking.

+
+
probability of type {{float}}
+
+

A confidence value in range [0,1]. + The approximate probability of the detected face being really + a human face. + The special value of exact zero indicates that the probability is + not estimated or known.

+
+
contour of type {{FrozenArray}}<{{Point2D}}>
+
+

A contour surrounding the detected face. + An example of a valid case is a four-point rectangle aligned to + the image axes which is the bounding box supported in many + platforms. + However, the API does not guarantee that the returned data is + anyhow aligned to image axes. + The points are given in image coordinates.

+

If the current {{MediaTrackSettings/faceDetectionMode}} setting + of the {{MediaStreamTrack}} object is not + {{FaceDetectionMode/"contour"}} or {{FaceDetectionMode/"mesh"}}, + then this member will not exist.

+

The length of the array is controlled by the current + {{MediaTrackSettings/faceDetectionNumContourPoints}} setting of + the {{MediaStreamTrack}} object.

+
+
mesh of type {{FrozenArray}}<{{Point2D}}>
+
+

Arbitrary points on the face. + When only a single point is returned, + it SHOULD be located at the center of the face. + The points are given in image coordinates.

+

If the current {{MediaTrackSettings/faceDetectionMode}} setting + of the {{MediaStreamTrack}} object is not + {{FaceDetectionMode/"mesh"}}, + then this member will not exist.

+
+
landmarks of type {{FrozenArray}}<{{DetectedFaceLandmark}}>
+
+

A series of features of interest related to the detected + face.

+

If the current {{MediaTrackSettings/faceDetectionLandmarks}} + setting of the {{MediaStreamTrack}} object is not + true, + then this member will not exist.

+
+
+
+
+
+

{{DetectedFaceLandmark}}

+
dictionary DetectedFaceLandmark {
+  required FrozenArray<Point2D> contour;
+  FaceLandmark                  type;
+};
+
+

Dictionary {{DetectedFaceLandmark}} Members

+
+
contour of type {{FrozenArray}}<{{Point2D}}>
+
+

A point at the center of the detected landmark, or + a sequence of points defining the vertices of a simple polygon + surrounding the landmark in either a clockwise or counter-clockwise + direction.

+

The length of the array is controlled by the current + {{MediaTrackSettings/faceDetectionNumLandmarkPoints}} setting of + the {{MediaStreamTrack}} object.

+
+
type of type {{FaceLandmark}}
+

The type of the detected landmark.

+
+
+
+
+

{{FaceLandmark}}

+
enum FaceLandmark {
+  "eye",
+  "eyeLeft",
+  "eyeRight",
+  "mouth",
+  "nose"
+};
+
+

{{FaceLandmark}} Enumeration Description

+
+
eye
+
+

The landmark is identified as a human eye, + either left or right.

+
+
eyeLeft
+

The landmark is identified as a human left eye.

+
eyeRight
+

The landmark is identified as a human right eye.

+
mouth
+

The landmark is identified as a human mouth.

+
nose
+

The landmark is identified as a human nose.

+
+
+
+
+

{{MediaTrackSupportedConstraints}}

+
partial dictionary MediaTrackSupportedConstraints {
+  boolean faceDetectionMode = true;
+  boolean faceDetectionLandmarks = true;
+  boolean faceDetectionMaxNumFaces = true;
+  boolean faceDetectionNumContourPoints = true;
+  boolean faceDetectionNumLandmarkPoints = true;
+};
+
+

Dictionary {{MediaTrackSupportedConstraints}} Members

+
+
faceDetectionMode of type {{boolean}}, defaulting to true
+
+

Whether face detection mode constraining is + recognized.

+
+
faceDetectionLandmarks of type {{boolean}}, defaulting to true
+
+

Whether face landmark detection mode constraining is + recognized.

+
+
faceDetectionMaxNumFaces of type {{boolean}}, defaulting to true
+
+

Whether maximum number of face detection faces + constraining is recognized.

+
+
faceDetectionNumContourPoints of type {{boolean}}, defaulting to true
+
+

Whether number of face detection contour points + constraining is recognized.

+
+
faceDetectionNumLandmarkPoints of type {{boolean}}, defaulting to true
+
+

Whether number of face detection landmark points + constraining is recognized.

+
+
+
+
+
+

{{MediaTrackCapabilities}}

+
partial dictionary MediaTrackCapabilities {
+  sequence<DOMString> faceDetectionMode;
+  sequence<boolean>   faceDetectionLandmarks;
+  ULongRange          faceDetectionMaxNumFaces;
+  ULongRange          faceDetectionNumContourPoints;
+  ULongRange          faceDetectionNumLandmarkPoints;
+};
+
+

Dictionary {{MediaTrackCapabilities}} Members

+
+
faceDetectionMode of type sequence<{{DOMString}}>
+
+

A sequence of supported face detection modes. + Each string MUST be one of the members of + {{FaceDetectionMode}}.

+
+
faceDetectionLandmarks of type sequence<{{boolean}}>
+
+

A sequence of supported face landmark detection modes. + If the source cannot do landmark detection, + a single false is reported. + If the landmark detection cannot be turned off, + a single true is reported. + If the script can control the detection, + both false and true are reported as + possible values.

+
+
faceDetectionMaxNumFaces of type {{ULongRange}}
+
+

A supported range for the maximum number of face detection + faces.

+
+
faceDetectionNumContourPoints of type {{ULongRange}}
+
+

A supported range for the number of face detection contour + points.

+
+
faceDetectionNumLandmarkPoints of type {{ULongRange}}
+
+

A supported range for the number of face detection landmark + points.

+
+
+
+
+
+

{{MediaTrackConstraintSet}}

+
partial dictionary MediaTrackConstraintSet {
+  ConstrainDOMString faceDetectionMode;
+  ConstrainBoolean   faceDetectionLandmarks;
+  ConstrainULong     faceDetectionMaxNumFaces;
+  ConstrainULong     faceDetectionNumContourPoints;
+  ConstrainULong     faceDetectionNumLandmarkPoints;
+};
+
+

Dictionary {{MediaTrackConstraintSet}} Members

+
+
faceDetectionMode of type {{ConstrainDOMString}}
+
+

The string MUST be one of the members of {{FaceDetectionMode}}. + See face detection mode constrainable property.

+
+
faceDetectionLandmarks of type {{ConstrainBoolean}}
+
+

See face landmark detection mode constrainable property.

+
+
faceDetectionMaxNumFaces of type {{ConstrainULong}}
+
+

See maximum number of face detection faces constrainable property.

+
+
faceDetectionNumContourPoints of type {{ConstrainULong}}
+
+

See number of face detection contour points constrainable property.

+
+
faceDetectionNumLandmarkPoints of type {{ConstrainULong}}
+
+

See number of face detection landmark points constrainable property.

+
+
+
+
+
+

{{MediaTrackSettings}}

+
partial dictionary MediaTrackSettings {
+  DOMString faceDetectionMode;
+  boolean   faceDetectionLandmarks;
+  long      faceDetectionMaxNumFaces;
+  long      faceDetectionNumContourPoints;
+  long      faceDetectionNumLandmarkPoints;
+};
+
+

Dictionary {{MediaTrackSettings}} Members

+
+
faceDetectionMode of type {{DOMString}}
+
+

Current face detection mode setting. + The string MUST be one of the members of {{FaceDetectionMode}}.

+
+
faceDetectionLandmarks of type {{boolean}}
+
+

Current face landmark detection mode setting.

+
+
faceDetectionMaxNumFaces of type {{long}}
+
+

Current maximum number of face detection faces setting.

+
+
faceDetectionNumContourPoints of type {{long}}
+
+

Current number of face detection contour points setting.

+
+
faceDetectionNumLandmarkPoints of type {{long}}
+
+

Current number of face detection landmark points setting.

+
+
+
+
+
+

{{FaceDetectionMode}}

+
enum FaceDetectionMode {
+  "none",
+  "presence",
+  "contour",
+  "mesh"
+};
+
+

{{FaceDetectionMode}} Enumeration Description

+
+
none
+
+

This source does not offer human face detection. + For setting, this is interpreted as a command to turn of + the detection.

+
+
presence
+
+

This source offers human face presence detection, + or such a mode is requested.

+

This mode may be useful with a true + face landmark detection mode in order to detect human face + landmarks but not contours or meshes.

+
+
contour
+
+

This source offers human face contour detection, + or such a mode is requested.

+
+
mesh
+
+

This source offers human face mesh and contour detection, + or such a mode is requested.

+

It is possible to disable human face contour + detection in this mode by setting the number of face detection + contour points to zero.

+
+
+
+
+
+

Constrainable Properties

+
    +
  1. +

    Face detection mode describes which face details + (presence, contour points, mesh points) are to be detected.

    +
  2. +
  3. +

    Face landmark detection mode describes whether human + face landmarks are to be detected and exposed.

    +
  4. +
  5. +

    Maximum number of face detection faces descibes how + many human faces are to be detected and exposed at most.

    +
  6. +
  7. +

    Number of face detection contour points descibes how + many human faces contour points are to be detected and exposed per + human face.

    +
  8. +
  9. +

    Number of face detection landmark points descibes how + many human faces landmark points are to be detected and exposed per + human face landmark.

    +
  10. +
+
+
+

Examples

+
+// main.js:
+// Check if face detection is supported by the browser
+const supports = navigator.mediaDevices.getSupportedConstraints();
+if (supports.faceDetectionMode &&
+    supports.faceDetectionNumContourPoints) {
+  // Browser supports face contour detection.
+} else {
+  throw('Face contour detection is not supported');
+}
+
+// Open camera with face detection enabled
+const stream = await navigator.mediaDevices.getUserMedia({
+  video: {
+    faceDetectionMode: 'contour',
+    faceDetectionNumContourPoints: {exact: 4}
+  }
+});
+const [videoTrack] = stream.getVideoTracks();
+
+// Use a video worker and show to user.
+const videoElement = document.querySelector('video');
+const videoWorker = new Worker('video-worker.js');
+videoWorker.postMessage({track: videoTrack}, [videoTrack]);
+const {data} = await new Promise(r => videoWorker.onmessage);
+videoElement.srcObject = new MediaStream([data.videoTrack]);
+
+// video-worker.js:
+self.onmessage = async ({data: {track}}) => {
+  const generator = new VideoTrackGenerator();
+  parent.postMessage({videoTrack: generator.track}, [generator.track]);
+  const {readable} = new MediaStreamTrackProcessor({track});
+  const transformer = new TransformStream({
+    async transform(frame, controller) {
+      for (const face of frame.detectedFaces) {
+        console.log(
+          `Face @ (${face.contour[0].x}, ${face.contour[0].y}), ` +
+                 `(${face.contour[1].x}, ${face.contour[1].y}), ` +
+                 `(${face.contour[2].x}, ${face.contour[2].y}), ` +
+                 `(${face.contour[3].x}, ${face.contour[3].y})`);
+      }
+      controller.enqueue(frame);
+    }
+  });
+  await readable.pipeThrough(transformer).pipeTo(generator.writable);
+};
+      
+
+