Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MediaVision] Add new inference APIs #6316

Merged
merged 6 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/Tizen.Multimedia.Vision/Interop/Interop.Libraries.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ internal static partial class Libraries
{
public const string MediaVisionCommon = "libmv_common.so";
public const string MediaVisionFace = "libmv_face.so";
public const string MediaVisionInference = "libmv_inference.so";
public const string MediaVisionImage = "libmv_image.so";
public const string MediaVisionSurveillance = "libmv_surveillance.so";
public const string MediaVisionBarcodeDetector = "libmv_barcode_detector.so";
public const string MediaVisionBarcodeGenerator = "libmv_barcode_generator.so";
public const string MediaVisionRoiTracker = "libmv_roi_tracker.so";
public const string MediaVisionFaceRecognition = "libmv_face_recognition.so"; // It's based on machine learning
public const string MediaVisionInference = "libmv_inference.so";
public const string MediaVisionInferenceImageClassification = "libmv_image_classification.so"; // Inference image classification
public const string MediaVisionInferenceObjectDetection = "libmv_object_detection.so";
public const string MediaVisionInferenceFaceDetection = MediaVisionInferenceObjectDetection; // Inference object detection and face detection
public const string MediaVisionInferenceFacialLandmarkDetection = "libmv_landmark_detection.so";
public const string MediaVisionInferencePoseLandmarkDetection = "libmv_landmark_detection.so"; // Inference facial landmark detection and pose landmark detection
}
}
140 changes: 140 additions & 0 deletions src/Tizen.Multimedia.Vision/Interop/Interop.MediaVision.Inference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,5 +112,145 @@ internal static extern MediaVisionError DetectFacialLandmark(IntPtr source, IntP
internal static extern MediaVisionError DetectPoseLandmark(IntPtr source, IntPtr inference,
IntPtr roi, PoseLandmarkDetectedCallback callback, IntPtr userData = default(IntPtr)); // Deprecated in API 12
}

internal static partial class InferenceImageClassification
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_get_label")]
internal static extern MediaVisionError GetLabel(IntPtr handle, uint index, out IntPtr label);
}

internal static partial class InferenceFaceDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_get_bound_box")]
internal static extern MediaVisionError GetBoundBox(IntPtr handle, uint index, out int left, out int top, out int right, out int bottom);
}

internal static partial class InferenceObjectDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_get_bound_box")]
internal static extern MediaVisionError GetBoundBox(IntPtr handle, uint index, out int left, out int top, out int right, out int bottom);
}

internal static partial class InferenceFacialLandmarkDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_get_position")]
internal static extern MediaVisionError GetPosition(IntPtr handle, uint index, out uint posX, out uint posY);
}

internal static partial class InferencePoseLandmarkDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_get_position")]
internal static extern MediaVisionError GetPosition(IntPtr handle, uint index, out uint posX, out uint posY);
}
}
}
204 changes: 204 additions & 0 deletions src/Tizen.Multimedia.Vision/MediaVision/InferenceFaceDetector.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Threading;
using System.Threading.Tasks;
using InteropFD = Interop.MediaVision.InferenceFaceDetection;

namespace Tizen.Multimedia.Vision
{
/// <summary>
/// Provides the ability to detect face.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like a plural in those class descriptions more: "detect faces", "classify images", "objects".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll fix it. But it's different in image classifier case.
It works with only one input image, so I'll keep the word classify image.

/// </summary>
/// <feature>http://tizen.org/feature/vision.inference</feature>
/// <feature>http://tizen.org/feature/vision.inference.face</feature>
/// <since_tizen> 12 </since_tizen>
public class InferenceFaceDetector : IDisposable
{
private IntPtr _handle;
private bool _disposed;

/// <summary>Initializes a new instance of the <see cref="InferenceFaceDetector"/> class.</summary>
/// <exception cref="NotSupportedException">The required features are not supported.</exception>
/// <since_tizen> 12 </since_tizen>
public InferenceFaceDetector()
{
ValidationUtil.ValidateFeatureSupported(VisionFeatures.Inference);
ValidationUtil.ValidateFeatureSupported(VisionFeatures.InferenceFace);

InteropFD.Create(out _handle).Validate("Failed to create inference face detector.");

try
{
InteropFD.Configure(_handle).Validate("Failed to configure inference face detector.");
InteropFD.Prepare(_handle).Validate("Failed to prepare inference face detector.");
}
catch (Exception e)
{
Log.Error(MediaVisionLog.Tag, e.ToString());
InteropFD.Destroy(_handle);
throw;
}
}

/// <summary>
/// Finalizes an instance of the InferenceFaceDetector class.
/// </summary>
~InferenceFaceDetector()
{
Dispose(false);
}

/// <summary>
/// Detects face on the source image synchronously.
/// </summary>
/// <remarks>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundBox"/> will be empty.
/// </remarks>
/// <param name="source">The image data to detect face.</param>
/// <returns>A label of detected face.</returns>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see "label" in the result

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll fix it.

/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
/// <since_tizen> 12 </since_tizen>
public InferenceFaceDetectorResult Inference(MediaVisionSource source)
{
ValidateNotDisposed();

if (source == null)
{
throw new ArgumentNullException(nameof(source));
}

InteropFD.Inference(_handle, source.Handle).Validate("Failed to inference face detection.");

return new InferenceFaceDetectorResult(_handle);
}

/// <summary>
/// Detects face on the source image asynchronously.
/// </summary>
/// <remarks>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundBox"/> will be empty.<br/>
/// This API uses about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please replace all "this API" related to methods with "this method" in the PR

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll fix it.

/// </remarks>
/// <param name="source">The image data to detect face.</param>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
/// <since_tizen> 12 </since_tizen>
public async Task<InferenceFaceDetectorResult> InferenceAsync(MediaVisionSource source)
{
ValidateNotDisposed();

if (source == null)
{
throw new ArgumentNullException(nameof(source));
}

InteropFD.InferenceAsync(_handle, source.Handle).Validate("Failed to inference face detection.");

return await Task.Factory.StartNew(() => new InferenceFaceDetectorResult(_handle),
CancellationToken.None,
TaskCreationOptions.DenyChildAttach | TaskCreationOptions.LongRunning,
TaskScheduler.Default);
}

/// <summary>
/// Requests to detect face on the given source image.<br/>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Requests to detect face on the given source image.<br/>
/// Requests detecting face on the given source image.<br/>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll fix it.

/// </summary>
/// <remarks>
/// This API is not guranteed that inference is done when this method returns. The user can get the result by using <see cref="GetBoundBox"/>.<br/>
/// And the user call this API again before the previous one is finished internally, API call will be ignored until the previous one is finished.<br/>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundBox"/> will be empty.<br/>
/// Note that this API could use about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// This API is not guranteed that inference is done when this method returns. The user can get the result by using <see cref="GetBoundBox"/>.<br/>
/// And the user call this API again before the previous one is finished internally, API call will be ignored until the previous one is finished.<br/>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundBox"/> will be empty.<br/>
/// Note that this API could use about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>
/// This function does not guarantee that inference is done when this method returns. The user can get the result by using <see cref="GetBoundBox"/>.<br/>
/// If the user calls this method again before the previous one is finished internally, the call will be ignored.<br/>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundBox"/> will be empty.<br/>
/// Note that this method could use about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll fix it.

/// <param name="source">The image data to detect face.</param>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
/// <seealso cref="GetBoundBox"/>
/// <since_tizen> 12 </since_tizen>
public void RequestInference(MediaVisionSource source)
{
ValidateNotDisposed();

if (source == null)
{
throw new ArgumentNullException(nameof(source));
}

InteropFD.InferenceAsync(_handle, source.Handle).Validate("Failed to inference face detection.");
}

/// <summary>
/// Gets the bound box as a result of <see cref="RequestInference"/>.
/// </summary>
/// <remarks>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundBox"/> will be empty.<br/>
/// This API uses about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>
/// <returns>A bound box of detected face.</returns>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <seealso cref="RequestInference"/>
/// <since_tizen> 12 </since_tizen>
public InferenceFaceDetectorResult GetBoundBox()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If think those methods (GetBoundBox(), GetLabel(), ...) can be named just "GetResult()"
What if there was not inference requested already? InvalidOperation exception would be fitting, but maybe empty result is returned?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When deciding on a function name, it would be best to choose one that allows you to guess what result can be obtained from it. GetResult() has implicit mearning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it could be changed to GetRequestResults(), because it's include request id also and it could be added more property in the future.

{
return new InferenceFaceDetectorResult(_handle);
}

/// <summary>
/// Releases the unmanaged resources used by the InferenceFaceDetector.
/// </summary>
/// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
/// <since_tizen> 12 </since_tizen>
protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
// to be used if there are any other disposable objects
}

if (_handle != IntPtr.Zero)
{
InteropFD.Destroy(_handle);
_handle = IntPtr.Zero;
}

_disposed = true;
}
}

/// <summary>
/// Releases all resources used by the InferenceFaceDetector.
/// </summary>
/// <since_tizen> 12 </since_tizen>
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}

internal void ValidateNotDisposed()
{
if (_disposed)
{
Log.Error(MediaVisionLog.Tag, "InferenceFaceDetector handle is disposed.");
throw new ObjectDisposedException(nameof(InferenceFaceDetector));
}
}
}
}
Loading
Loading