diff --git a/README.md b/README.md
index 8ed70ce..46ef444 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
[![Ultralytics Actions](https://github.com/ultralytics/yolo-ios-app/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/yolo-ios-app/actions/workflows/format.yml)
-Welcome to the [Ultralytics YOLO iOS App](https://apps.apple.com/us/app/idetection/id1452689527) GitHub repository! 📖 Leveraging Ultralytics' advanced [YOLOv8 object detection models](https://github.com/ultralytics/ultralytics), this app transforms your iOS device into an intelligent detection tool. Explore our guide to get started with the Ultralytics YOLO iOS App and discover the world in a new and exciting way.
+Welcome to the [Ultralytics YOLO iOS App](https://apps.apple.com/us/app/idetection/id1452689527) GitHub repository! 📖 Leveraging Ultralytics' advanced [YOLO11 object detection models](https://github.com/ultralytics/ultralytics), this app transforms your iOS device into an intelligent detection tool. Explore our guide to get started with the Ultralytics YOLO iOS App and discover the world in a new and exciting way.
@@ -60,17 +60,17 @@ Ensure you have the following before you start:
In Xcode, go to the project's target settings and choose your Apple Developer account under the "Signing & Capabilities" tab.
-3. **Add YOLOv8 Models to the Project:**
+3. **Add YOLO11 Models to the Project:**
- Export CoreML INT8 models using the `ultralytics` Python package (with `pip install ultralytics`), or download them from our [GitHub release assets](https://github.com/ultralytics/yolo-ios-app/releases). You should have 5 YOLOv8 models in total. Place these in the `YOLO/Models` directory as seen in the Xcode screenshot below.
+ Export CoreML INT8 models using the `ultralytics` Python package (with `pip install ultralytics`), or download them from our [GitHub release assets](https://github.com/ultralytics/yolo-ios-app/releases). You should have 5 YOLO11 models in total. Place these in the `YOLO/Models` directory as seen in the Xcode screenshot below.
```python
from ultralytics import YOLO
- # Loop through all YOLOv8 model sizes
+ # Loop through all YOLO11 model sizes
for size in ("n", "s", "m", "l", "x"):
- # Load a YOLOv8 PyTorch model
- model = YOLO(f"yolov8{size}.pt")
+ # Load a YOLO11 PyTorch model
+ model = YOLO(f"yolo11{size}.pt")
# Export the PyTorch model to CoreML INT8 format with NMS layers
model.export(format="coreml", int8=True, nms=True, imgsz=[640, 384])
@@ -89,7 +89,7 @@ Ensure you have the following before you start:
The Ultralytics YOLO iOS App is designed to be intuitive:
- **Real-Time Detection:** Launch the app and aim your camera at objects to detect them instantly.
-- **Multiple AI Models:** Select from a range of Ultralytics YOLOv8 models, from YOLOv8n 'nano' to YOLOv8x 'x-large'.
+- **Multiple AI Models:** Select from a range of Ultralytics YOLO11 models, from YOLO11n 'nano' to YOLO11x 'x-large'.
## 💡 Contribute
diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj
index 68916e9..b924a39 100644
--- a/YOLO.xcodeproj/project.pbxproj
+++ b/YOLO.xcodeproj/project.pbxproj
@@ -13,16 +13,25 @@
636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; };
636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; };
636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; };
- 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; };
- 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; };
- 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; };
- 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; };
- 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; };
63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; };
63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; };
63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; };
+ 733FEE4F2CF2D77600C0D4E9 /* yolo11s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE4D2CF2D77600C0D4E9 /* yolo11s.mlpackage */; };
+ 733FEE502CF2D77600C0D4E9 /* yolo11m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE4B2CF2D77600C0D4E9 /* yolo11m.mlpackage */; };
+ 733FEE512CF2D77600C0D4E9 /* yolo11n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE4C2CF2D77600C0D4E9 /* yolo11n.mlpackage */; };
+ 733FEE522CF2D77600C0D4E9 /* yolo11x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE4E2CF2D77600C0D4E9 /* yolo11x.mlpackage */; };
+ 733FEE532CF2D77600C0D4E9 /* yolo11l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE4A2CF2D77600C0D4E9 /* yolo11l.mlpackage */; };
+ 733FEE552CF2DB6500C0D4E9 /* Classify.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE542CF2DB6500C0D4E9 /* Classify.swift */; };
+ 733FEE572CF357A900C0D4E9 /* yolo11n-cls.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE562CF357A900C0D4E9 /* yolo11n-cls.mlpackage */; };
+ 733FEE592CF3589A00C0D4E9 /* Detect.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE582CF3589A00C0D4E9 /* Detect.swift */; };
+ 733FEE5B2CF4BFA400C0D4E9 /* Segment.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE5A2CF4BFA400C0D4E9 /* Segment.swift */; };
+ 733FEE5D2CF5108C00C0D4E9 /* CoreML.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE5C2CF5108C00C0D4E9 /* CoreML.swift */; };
+ 733FEE5F2CF579EE00C0D4E9 /* yolo11n-seg.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE5E2CF579EE00C0D4E9 /* yolo11n-seg.mlpackage */; };
+ 733FEE632CF57A2200C0D4E9 /* yolo11n-pose.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE612CF57A2200C0D4E9 /* yolo11n-pose.mlpackage */; };
+ 733FEE652CF6D65A00C0D4E9 /* Pose.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733FEE642CF6D65A00C0D4E9 /* Pose.swift */; };
8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; };
/* End PBXBuildFile section */
+
/* Begin PBXFileReference section */
6323C44D22186177008AE681 /* LaunchScreen.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = LaunchScreen.storyboard; sourceTree = "
"; };
6323C44F22186177008AE681 /* Main.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = Main.storyboard; sourceTree = ""; };
@@ -34,12 +43,20 @@
636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; };
636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; };
636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; };
- 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; };
- 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; };
- 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; };
- 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; };
- 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; };
63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; };
+ 733FEE4A2CF2D77600C0D4E9 /* yolo11l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolo11l.mlpackage; sourceTree = ""; };
+ 733FEE4B2CF2D77600C0D4E9 /* yolo11m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolo11m.mlpackage; sourceTree = ""; };
+ 733FEE4C2CF2D77600C0D4E9 /* yolo11n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolo11n.mlpackage; sourceTree = ""; };
+ 733FEE4D2CF2D77600C0D4E9 /* yolo11s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolo11s.mlpackage; sourceTree = ""; };
+ 733FEE4E2CF2D77600C0D4E9 /* yolo11x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolo11x.mlpackage; sourceTree = ""; };
+ 733FEE542CF2DB6500C0D4E9 /* Classify.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Classify.swift; sourceTree = ""; };
+ 733FEE562CF357A900C0D4E9 /* yolo11n-cls.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolo11n-cls.mlpackage"; sourceTree = ""; };
+ 733FEE582CF3589A00C0D4E9 /* Detect.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Detect.swift; sourceTree = ""; };
+ 733FEE5A2CF4BFA400C0D4E9 /* Segment.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Segment.swift; sourceTree = ""; };
+ 733FEE5C2CF5108C00C0D4E9 /* CoreML.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CoreML.swift; sourceTree = ""; };
+ 733FEE5E2CF579EE00C0D4E9 /* yolo11n-seg.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolo11n-seg.mlpackage"; sourceTree = ""; };
+ 733FEE612CF57A2200C0D4E9 /* yolo11n-pose.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolo11n-pose.mlpackage"; sourceTree = ""; };
+ 733FEE642CF6D65A00C0D4E9 /* Pose.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Pose.swift; sourceTree = ""; };
7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; };
8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; };
8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; };
@@ -61,6 +78,11 @@
children = (
636166E9251443B20054FA7E /* ThresholdProvider.swift */,
8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */,
+ 733FEE5C2CF5108C00C0D4E9 /* CoreML.swift */,
+ 733FEE582CF3589A00C0D4E9 /* Detect.swift */,
+ 733FEE542CF2DB6500C0D4E9 /* Classify.swift */,
+ 733FEE5A2CF4BFA400C0D4E9 /* Segment.swift */,
+ 733FEE642CF6D65A00C0D4E9 /* Pose.swift */,
);
path = Utilities;
sourceTree = "";
@@ -86,11 +108,14 @@
63A946D8271800E20001C3ED /* Models */ = {
isa = PBXGroup;
children = (
- 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */,
- 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */,
- 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */,
- 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */,
- 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */,
+ 733FEE4A2CF2D77600C0D4E9 /* yolo11l.mlpackage */,
+ 733FEE4B2CF2D77600C0D4E9 /* yolo11m.mlpackage */,
+ 733FEE4C2CF2D77600C0D4E9 /* yolo11n.mlpackage */,
+ 733FEE4D2CF2D77600C0D4E9 /* yolo11s.mlpackage */,
+ 733FEE4E2CF2D77600C0D4E9 /* yolo11x.mlpackage */,
+ 733FEE562CF357A900C0D4E9 /* yolo11n-cls.mlpackage */,
+ 733FEE5E2CF579EE00C0D4E9 /* yolo11n-seg.mlpackage */,
+ 733FEE612CF57A2200C0D4E9 /* yolo11n-pose.mlpackage */,
);
path = Models;
sourceTree = "";
@@ -209,15 +234,23 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
- 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */,
- 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */,
636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */,
+ 733FEE4F2CF2D77600C0D4E9 /* yolo11s.mlpackage in Sources */,
+ 733FEE502CF2D77600C0D4E9 /* yolo11m.mlpackage in Sources */,
+ 733FEE5B2CF4BFA400C0D4E9 /* Segment.swift in Sources */,
+ 733FEE512CF2D77600C0D4E9 /* yolo11n.mlpackage in Sources */,
+ 733FEE522CF2D77600C0D4E9 /* yolo11x.mlpackage in Sources */,
+ 733FEE552CF2DB6500C0D4E9 /* Classify.swift in Sources */,
+ 733FEE5F2CF579EE00C0D4E9 /* yolo11n-seg.mlpackage in Sources */,
+ 733FEE632CF57A2200C0D4E9 /* yolo11n-pose.mlpackage in Sources */,
+ 733FEE572CF357A900C0D4E9 /* yolo11n-cls.mlpackage in Sources */,
+ 733FEE532CF2D77600C0D4E9 /* yolo11l.mlpackage in Sources */,
+ 733FEE592CF3589A00C0D4E9 /* Detect.swift in Sources */,
636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */,
- 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */,
- 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */,
- 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */,
636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */,
636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */,
+ 733FEE652CF6D65A00C0D4E9 /* Pose.swift in Sources */,
+ 733FEE5D2CF5108C00C0D4E9 /* CoreML.swift in Sources */,
8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@@ -354,12 +387,12 @@
INFOPLIST_FILE = YOLO/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO";
INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
- IPHONEOS_DEPLOYMENT_TARGET = 14.0;
+ IPHONEOS_DEPLOYMENT_TARGET = 16.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
- MARKETING_VERSION = 8.2.0;
+ MARKETING_VERSION = 8.3.0;
PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection;
PRODUCT_NAME = "$(TARGET_NAME)";
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
@@ -382,12 +415,12 @@
INFOPLIST_FILE = YOLO/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO";
INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools";
- IPHONEOS_DEPLOYMENT_TARGET = 14.0;
+ IPHONEOS_DEPLOYMENT_TARGET = 16.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
- MARKETING_VERSION = 8.2.0;
+ MARKETING_VERSION = 8.3.0;
PRODUCT_BUNDLE_IDENTIFIER = com.ultralytics.iDetection;
PRODUCT_NAME = "$(TARGET_NAME)";
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator";
diff --git a/YOLO/Info.plist b/YOLO/Info.plist
index a7022ec..308e4be 100644
--- a/YOLO/Info.plist
+++ b/YOLO/Info.plist
@@ -21,7 +21,7 @@
CFBundleShortVersionString
$(MARKETING_VERSION)
CFBundleVersion
- 25
+ 117
ITSAppUsesNonExemptEncryption
LSRequiresIPhoneOS
diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard
index 549bc72..cf599d6 100644
--- a/YOLO/Main.storyboard
+++ b/YOLO/Main.storyboard
@@ -1,9 +1,9 @@
-
+
-
+
@@ -32,7 +32,7 @@
-
+
@@ -41,11 +41,11 @@
-
-
-
-
-
+
+
+
+
+
@@ -139,7 +139,7 @@
-
+
@@ -229,6 +229,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
@@ -239,12 +251,15 @@
+
+
+
@@ -273,7 +288,9 @@
+
+
@@ -304,14 +321,15 @@
+
-
+
@@ -331,10 +349,10 @@
-
+
-
+
diff --git a/YOLO/Utilities/Classify.swift b/YOLO/Utilities/Classify.swift
new file mode 100644
index 0000000..81138f0
--- /dev/null
+++ b/YOLO/Utilities/Classify.swift
@@ -0,0 +1,93 @@
+//
+// Classify.swift
+// YOLO
+//
+
+import Foundation
+import UIKit
+import Vision
+
+extension ViewController {
+ // view
+ func setupClassifyOverlay() {
+
+ classifyOverlay = UILabel(
+ frame: CGRect(x: view.center.x - 100, y: view.center.y - 50, width: 200, height: 100))
+
+ classifyOverlay.backgroundColor = UIColor.black.withAlphaComponent(0.5)
+ classifyOverlay.clipsToBounds = true
+ classifyOverlay.layer.cornerRadius = 8
+ classifyOverlay.numberOfLines = 2
+ classifyOverlay.textAlignment = .left
+ view.addSubview(classifyOverlay)
+ classifyOverlay.isHidden = true
+ }
+
+ func showClassifyUI() {
+ taskSegmentControl.selectedSegmentIndex = 1
+ modelSegmentedControl.selectedSegmentIndex = 0
+ classifyOverlay.isHidden = false
+ }
+
+ func updateClassifyOverlay() {
+
+ classifyOverlay.frame = CGRect(
+ x: view.center.x - 100, y: view.center.y - 50, width: 200, height: 100)
+ }
+ // post process
+
+ func postProcessClassify(request: VNRequest) {
+ if let observation = visionRequest.results as? [VNCoreMLFeatureValueObservation] {
+
+ // Get the MLMultiArray from the observation
+ let multiArray = observation.first?.featureValue.multiArrayValue
+
+ if let multiArray = multiArray {
+ // Initialize an array to store the classes
+ var valuesArray = [Double]()
+
+ // Loop through the MLMultiArray and append its values to the array
+ for i in 0.. $1.value })
+
+ var recognitions: [[String: Any]] = []
+ for (index, value) in sortedMap {
+ let label = self.classifyLabels[index]
+ recognitions.append([
+ "label": label,
+ "confidence": value,
+ "index": index,
+ ])
+ }
+ print(recognitions)
+ }
+ } else if let observations = request.results as? [VNClassificationObservation] {
+
+ var recognitions: [[String: Any]] = []
+
+ // Convert each VNClassificationObservation into the desired format
+ guard let topResult = observations.first else { return }
+ let label = topResult.identifier // Class label
+ let confidence = topResult.confidence // Confidence score (between 0 and 1)
+ let percentageValue = confidence * 100
+ let formattedPercentage = round(percentageValue * 10) / 10
+
+ let resultText = " \(label)\n \(formattedPercentage) %"
+ DispatchQueue.main.async {
+ self.classifyOverlay.text = resultText
+ }
+
+ }
+ }
+}
diff --git a/YOLO/Utilities/CoreML.swift b/YOLO/Utilities/CoreML.swift
new file mode 100644
index 0000000..5cf6c1e
--- /dev/null
+++ b/YOLO/Utilities/CoreML.swift
@@ -0,0 +1,151 @@
+//
+// ModelSelect.swift
+// YOLO
+//
+// Created by 間嶋大輔 on 2024/11/26.
+// Copyright © 2024 Ultralytics. All rights reserved.
+//
+
+import Foundation
+import UIKit
+import Vision
+
+extension ViewController {
+ func predict(sampleBuffer: CMSampleBuffer) {
+ if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
+ currentBuffer = pixelBuffer
+ if !frameSizeCaptured {
+ let frameWidth = CGFloat(CVPixelBufferGetWidth(pixelBuffer))
+ let frameHeight = CGFloat(CVPixelBufferGetHeight(pixelBuffer))
+ longSide = max(frameWidth, frameHeight)
+ shortSide = min(frameWidth, frameHeight)
+ frameSizeCaptured = true
+ }
+ /// - Tag: MappingOrientation
+ // The frame is always oriented based on the camera sensor,
+ // so in most cases Vision needs to rotate it for the model to work as expected.
+ let imageOrientation: CGImagePropertyOrientation
+ switch UIDevice.current.orientation {
+ case .portrait:
+ imageOrientation = .up
+ case .portraitUpsideDown:
+ imageOrientation = .down
+ case .landscapeLeft:
+ imageOrientation = .up
+ case .landscapeRight:
+ imageOrientation = .up
+ case .unknown:
+ imageOrientation = .up
+ default:
+ imageOrientation = .up
+ }
+
+ // Invoke a VNRequestHandler with that image
+ let handler = VNImageRequestHandler(
+ cvPixelBuffer: pixelBuffer, orientation: imageOrientation, options: [:])
+ if UIDevice.current.orientation != .faceUp { // stop if placed down on a table
+ t0 = CACurrentMediaTime() // inference start
+ do {
+ try handler.perform([visionRequest])
+ } catch {
+ print(error)
+ }
+ t1 = CACurrentMediaTime() - t0 // inference dt
+ }
+
+ currentBuffer = nil
+ }
+ }
+
+ func processObservations(for request: VNRequest, error: Error?) {
+ DispatchQueue.main.async {
+ switch self.task {
+ case .detect:
+ self.postProcessDetect(request: request)
+ case .classify:
+ self.postProcessClassify(request: request)
+ case .segment:
+ self.postProcessSegment(request: request)
+ case .pose:
+ self.postProcessPose(request: request)
+ default:
+ break
+ }
+ // Measure FPS
+ if self.t1 < 10.0 { // valid dt
+ self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time
+ }
+ self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS
+ self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms
+ self.t3 = CACurrentMediaTime()
+ }
+ }
+
+ func setModel() {
+
+ /// Switch model
+ switch task {
+ case .detect:
+ switch modelSegmentedControl.selectedSegmentIndex {
+ case 0:
+ self.labelName.text = "YOLOv11n"
+ mlModel = try! yolo11n(configuration: .init()).model
+ case 1:
+ self.labelName.text = "YOLOv11s"
+ mlModel = try! yolo11s(configuration: .init()).model
+ case 2:
+ self.labelName.text = "YOLOv11m"
+ mlModel = try! yolo11m(configuration: .init()).model
+ case 3:
+ self.labelName.text = "YOLOv11l"
+ mlModel = try! yolo11l(configuration: .init()).model
+ case 4:
+ self.labelName.text = "YOLOv11x"
+ mlModel = try! yolo11x(configuration: .init()).model
+ default:
+ break
+ }
+ case .classify:
+ switch modelSegmentedControl.selectedSegmentIndex {
+ case 0:
+ self.labelName.text = "YOLO11n"
+ mlModel = try! yolo11n_cls(configuration: .init()).model
+ default: break
+ }
+ case .segment:
+ switch modelSegmentedControl.selectedSegmentIndex {
+ case 0:
+ self.labelName.text = "YOLO11n"
+ mlModel = try! yolo11n_seg(configuration: .init()).model
+ default: break
+ }
+ case .pose:
+ switch modelSegmentedControl.selectedSegmentIndex {
+ case 0:
+ self.labelName.text = "YOLO11n"
+ mlModel = try! yolo11n_pose(configuration: .init()).model
+ default: break
+ }
+ default:
+ break
+ }
+ DispatchQueue.global(qos: .userInitiated).async { [self] in
+
+ /// VNCoreMLModel
+ detector = try! VNCoreMLModel(for: mlModel)
+ detector.featureProvider = ThresholdProvider()
+
+ /// VNCoreMLRequest
+ let request = VNCoreMLRequest(
+ model: detector,
+ completionHandler: { [weak self] request, error in
+ self?.processObservations(for: request, error: error)
+ })
+ request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop
+ visionRequest = request
+ t2 = 0.0 // inference dt smoothed
+ t3 = CACurrentMediaTime() // FPS start
+ t4 = 0.0 // FPS dt smoothed
+ }
+ }
+}
diff --git a/YOLO/Utilities/Detect.swift b/YOLO/Utilities/Detect.swift
new file mode 100644
index 0000000..658bfa3
--- /dev/null
+++ b/YOLO/Utilities/Detect.swift
@@ -0,0 +1,216 @@
+//
+// Detect.swift
+// YOLO
+
+import Foundation
+import UIKit
+import Vision
+
+extension ViewController {
+ func postProcessDetect(request: VNRequest) {
+ if let results = request.results as? [VNRecognizedObjectObservation] {
+ var predictions = [DetectionResult]()
+ for result in results {
+ let prediction = DetectionResult(
+ rect: result.boundingBox, label: result.labels[0].identifier,
+ confidence: result.labels[0].confidence)
+ predictions.append(prediction)
+ }
+ self.showBoundingBoxes(predictions: predictions)
+ } else {
+ self.showBoundingBoxes(predictions: [])
+ }
+ }
+
+ func showBoundingBoxes(predictions: [DetectionResult]) {
+ var str = ""
+ // date
+ let date = Date()
+ let calendar = Calendar.current
+ let hour = calendar.component(.hour, from: date)
+ let minutes = calendar.component(.minute, from: date)
+ let seconds = calendar.component(.second, from: date)
+ let nanoseconds = calendar.component(.nanosecond, from: date)
+ let sec_day =
+ Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day
+
+ self.labelSlider.text =
+ String(predictions.count) + " items (max " + String(Int(slider.value)) + ")"
+ let width = videoPreview.bounds.width // 375 pix
+ let height = videoPreview.bounds.height // 812 pix
+
+ if UIDevice.current.orientation == .portrait {
+
+ // ratio = videoPreview AR divided by sessionPreset AR
+ var ratio: CGFloat = 1.0
+ if videoCapture.captureSession.sessionPreset == .photo {
+ ratio = (height / width) / (4.0 / 3.0) // .photo
+ } else {
+ ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc.
+ }
+
+ for i in 0..= 1 { // iPhone ratio = 1.218
+ let offset = (1 - ratio) * (0.5 - rect.minX)
+ if task == .detect {
+ let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1)
+ rect = rect.applying(transform)
+ } else {
+ let transform = CGAffineTransform(translationX: offset, y: 0)
+ rect = rect.applying(transform)
+ }
+ rect.size.width *= ratio
+ } else { // iPad ratio = 0.75
+ let offset = (ratio - 1) * (0.5 - rect.maxY)
+ let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1)
+ rect = rect.applying(transform)
+ ratio = (height / width) / (3.0 / 4.0)
+ rect.size.height /= ratio
+ }
+
+ // Scale normalized to pixels [375, 812] [width, height]
+ rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height))
+
+ // The labels array is a list of VNClassificationObservation objects,
+ // with the highest scoring class first in the list.
+ let bestClass = prediction.label
+ let confidence = prediction.confidence
+ // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels)
+ let label = String(format: "%@ %.1f", bestClass, confidence * 100)
+ let alpha = CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)
+ // Show the bounding box.
+ boundingBoxViews[i].show(
+ frame: rect,
+ label: label,
+ color: colors[bestClass] ?? UIColor.white,
+ alpha: alpha) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0)
+
+ if developerMode {
+ // Write
+ if save_detections {
+ str += String(
+ format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n",
+ sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence,
+ rect.origin.x, rect.origin.y, rect.size.width, rect.size.height)
+ }
+ }
+ } else {
+ boundingBoxViews[i].hide()
+ }
+ }
+ } else {
+ let frameAspectRatio = longSide / shortSide
+ let viewAspectRatio = width / height
+ var scaleX: CGFloat = 1.0
+ var scaleY: CGFloat = 1.0
+ var offsetX: CGFloat = 0.0
+ var offsetY: CGFloat = 0.0
+
+ if frameAspectRatio > viewAspectRatio {
+ scaleY = height / shortSide
+ scaleX = scaleY
+ offsetX = (longSide * scaleX - width) / 2
+ } else {
+ scaleX = width / longSide
+ scaleY = scaleX
+ offsetY = (shortSide * scaleY - height) / 2
+ }
+
+ for i in 0.. [(CGRect, Float, [Float])]
+ {
+ let numAnchors = prediction.shape[2].intValue
+ let featureCount = prediction.shape[1].intValue - 5
+ var boxes = [CGRect]()
+ var scores = [Float]()
+ var features = [[Float]]()
+ let featurePointer = UnsafeMutablePointer(OpaquePointer(prediction.dataPointer))
+ let lock = DispatchQueue(label: "com.example.lock")
+
+ DispatchQueue.concurrentPerform(iterations: numAnchors) { j in
+ let confIndex = 4 * numAnchors + j
+ let confidence = featurePointer[confIndex]
+
+ if confidence > confidenceThreshold {
+ let x = featurePointer[j]
+ let y = featurePointer[numAnchors + j]
+ let width = featurePointer[2 * numAnchors + j]
+ let height = featurePointer[3 * numAnchors + j]
+
+ let boxWidth = CGFloat(width)
+ let boxHeight = CGFloat(height)
+ let boxX = CGFloat(x - width / 2)
+ let boxY = CGFloat(y - height / 2)
+
+ let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight)
+
+ var boxFeatures = [Float](repeating: 0, count: featureCount)
+ for k in 0..= confThreshold
+ && box.contains(CGPoint(x: CGFloat(keypoints[i * 3]), y: CGFloat(keypoints[i * 3 + 1])))
+ {
+ points[i] = (point, conf)
+
+ drawCircle(on: layer, at: point, radius: radius, color: kptColorIndices[i])
+ }
+ }
+
+ if drawSkeleton {
+ for (index, bone) in skeleton.enumerated() {
+ let (startIdx, endIdx) = (bone[0] - 1, bone[1] - 1)
+
+ guard startIdx < points.count, endIdx < points.count else {
+ print("Invalid skeleton indices: \(startIdx), \(endIdx)")
+ continue
+ }
+
+ let startPoint = points[startIdx].0
+ let endPoint = points[endIdx].0
+ let startConf = points[startIdx].1
+ let endConf = points[endIdx].1
+
+ if startConf >= confThreshold && endConf >= confThreshold {
+ drawLine(on: layer, from: startPoint, to: endPoint, color: limbColorIndices[index])
+ }
+ }
+ }
+ }
+
+ func drawCircle(on layer: CALayer, at point: CGPoint, radius: CGFloat, color index: Int) {
+ let circleLayer = CAShapeLayer()
+ circleLayer.path =
+ UIBezierPath(
+ arcCenter: point,
+ radius: radius,
+ startAngle: 0,
+ endAngle: .pi * 2,
+ clockwise: true
+ ).cgPath
+
+ let color = posePalette[index].map { $0 / 255.0 }
+ circleLayer.fillColor =
+ UIColor(red: color[0], green: color[1], blue: color[2], alpha: 1.0).cgColor
+
+ layer.addSublayer(circleLayer)
+ }
+
+ func drawLine(on layer: CALayer, from start: CGPoint, to end: CGPoint, color index: Int) {
+ let lineLayer = CAShapeLayer()
+ let path = UIBezierPath()
+ path.move(to: start)
+ path.addLine(to: end)
+
+ lineLayer.path = path.cgPath
+ lineLayer.lineWidth = 2
+
+ let color = posePalette[index].map { $0 / 255.0 }
+ lineLayer.strokeColor =
+ UIColor(red: color[0], green: color[1], blue: color[2], alpha: 1.0).cgColor
+
+ layer.addSublayer(lineLayer)
+ }
+}
+
+let posePalette: [[CGFloat]] = [
+ [255, 128, 0],
+ [255, 153, 51],
+ [255, 178, 102],
+ [230, 230, 0],
+ [255, 153, 255],
+ [153, 204, 255],
+ [255, 102, 255],
+ [255, 51, 255],
+ [102, 178, 255],
+ [51, 153, 255],
+ [255, 153, 153],
+ [255, 102, 102],
+ [255, 51, 51],
+ [153, 255, 153],
+ [102, 255, 102],
+ [51, 255, 51],
+ [0, 255, 0],
+ [0, 0, 255],
+ [255, 0, 0],
+ [255, 255, 255],
+]
+
+let limbColorIndices = [0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16]
+let kptColorIndices = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
+
+let skeleton = [
+ [16, 14],
+ [14, 12],
+ [17, 15],
+ [15, 13],
+ [12, 13],
+ [6, 12],
+ [7, 13],
+ [6, 7],
+ [6, 8],
+ [7, 9],
+ [8, 10],
+ [9, 11],
+ [2, 3],
+ [1, 2],
+ [1, 3],
+ [2, 4],
+ [3, 5],
+ [4, 6],
+ [5, 7],
+]
diff --git a/YOLO/Utilities/Segment.swift b/YOLO/Utilities/Segment.swift
new file mode 100644
index 0000000..fc29cfa
--- /dev/null
+++ b/YOLO/Utilities/Segment.swift
@@ -0,0 +1,379 @@
+//
+// Segment.swift
+// YOLO
+//
+
+import Accelerate
+import Foundation
+import UIKit
+import Vision
+
+extension ViewController {
+ func setupSegmentPoseOverlay() {
+ let width = videoPreview.bounds.width
+ let height = videoPreview.bounds.height
+
+ var ratio: CGFloat = 1.0
+ if videoCapture.captureSession.sessionPreset == .photo {
+ ratio = (4.0 / 3.0)
+ } else {
+ ratio = (16.0 / 9.0)
+ }
+ var offSet = CGFloat.zero
+ var margin = CGFloat.zero
+ if view.bounds.width < view.bounds.height {
+ offSet = height / ratio
+ margin = (offSet - self.videoPreview.bounds.width) / 2
+ self.segmentPoseOverlay.frame = CGRect(
+ x: -margin, y: 0, width: offSet, height: self.videoPreview.bounds.height)
+ } else {
+ offSet = width / ratio
+ margin = (offSet - self.videoPreview.bounds.height) / 2
+ self.segmentPoseOverlay.frame = CGRect(
+ x: 0, y: -margin, width: self.videoPreview.bounds.width, height: offSet)
+
+ }
+ var count = 0
+ for _ in colors {
+ let color = ultralyticsColorsolors[count]
+ count += 1
+ if count > 19 {
+ count = 0
+ }
+ guard let colorForMask = color.toRGBComponents() else { fatalError() }
+ colorsForMasks.append(colorForMask)
+ }
+ }
+
+ func postProcessSegment(request: VNRequest) {
+ if let results = request.results as? [VNCoreMLFeatureValueObservation] {
+ DispatchQueue.main.async { [self] in
+ guard results.count == 2 else { return }
+ let masks = results[0].featureValue.multiArrayValue
+ let pred = results[1].featureValue.multiArrayValue
+ let a = Date()
+
+ let processed = getBoundingBoxesAndMasks(
+ feature: pred!, confidenceThreshold: 0.25, iouThreshold: 0.4)
+ var predictions = [DetectionResult]()
+ for object in processed {
+ let box = object.0
+ let rect = CGRect(
+ x: box.minX / 640, y: box.minY / 640, width: box.width / 640, height: box.height / 640)
+ let bestClass = classes[object.1]
+ let confidence = object.2
+ let prediction = DetectionResult(rect: rect, label: bestClass, confidence: confidence)
+ predictions.append(prediction)
+ }
+ self.showBoundingBoxes(predictions: predictions)
+ self.updateMaskAndBoxes(detectedObjects: processed, maskArray: masks!)
+ }
+ }
+ }
+
+ func getBoundingBoxesAndMasks(
+ feature: MLMultiArray, confidenceThreshold: Float, iouThreshold: Float
+ ) -> [(CGRect, Int, Float, MLMultiArray)] {
+ let numAnchors = feature.shape[2].intValue
+ let numFeatures = feature.shape[1].intValue
+ let boxFeatureLength = 4
+ let maskConfidenceLength = 32
+ let numClasses = numFeatures - boxFeatureLength - maskConfidenceLength
+
+ var results = [(CGRect, Int, Float, MLMultiArray)]()
+ let featurePointer = feature.dataPointer.assumingMemoryBound(to: Float.self)
+
+ let resultsQueue = DispatchQueue(label: "resultsQueue", attributes: .concurrent)
+
+ DispatchQueue.concurrentPerform(iterations: numAnchors) { j in
+ let baseOffset = j
+ let x = featurePointer[baseOffset]
+ let y = featurePointer[numAnchors + baseOffset]
+ let width = featurePointer[2 * numAnchors + baseOffset]
+ let height = featurePointer[3 * numAnchors + baseOffset]
+
+ let boxWidth = CGFloat(width)
+ let boxHeight = CGFloat(height)
+ let boxX = CGFloat(x - width / 2)
+ let boxY = CGFloat(y - height / 2)
+
+ let boundingBox = CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight)
+
+ var classProbs = [Float](repeating: 0, count: numClasses)
+ classProbs.withUnsafeMutableBufferPointer { classProbsPointer in
+ vDSP_mtrans(
+ featurePointer + 4 * numAnchors + baseOffset, numAnchors, classProbsPointer.baseAddress!,
+ 1, 1, vDSP_Length(numClasses))
+ }
+ var maxClassValue: Float = 0
+ var maxClassIndex: vDSP_Length = 0
+ vDSP_maxvi(classProbs, 1, &maxClassValue, &maxClassIndex, vDSP_Length(numClasses))
+
+ if maxClassValue > confidenceThreshold {
+ let maskProbsPointer = featurePointer + (4 + numClasses) * numAnchors + baseOffset
+ let maskProbs = try! MLMultiArray(
+ shape: [NSNumber(value: maskConfidenceLength)], dataType: .float32)
+ for i in 0.. $1.0.size.width * $1.0.size.height
+ }
+
+ var newLayers: [CALayer] = []
+
+ for (box, classIndex, _, masksIn) in sortedObjects {
+ group.enter()
+ DispatchQueue.global(qos: .userInitiated).async {
+ if let maskImage = self.generateColoredMaskImage(
+ from: masksIn, protos: maskArray, in: self.segmentPoseOverlay.bounds.size,
+ colorIndex: classIndex,
+ boundingBox: box)
+ {
+ let adjustedBox = self.adjustBox(box, toFitIn: self.segmentPoseOverlay.bounds.size)
+
+ let maskImageLayer = CALayer()
+ maskImageLayer.frame = adjustedBox
+ maskImageLayer.contents = maskImage
+ maskImageLayer.opacity = 0.5
+ DispatchQueue.main.async {
+ newLayers.append(maskImageLayer)
+ }
+ }
+ group.leave()
+ }
+ }
+
+ // 全タスクの終了を待つ
+ group.notify(queue: .main) { [weak self] in
+ guard let self = self else { return }
+ self.removeAllMaskSubLayers()
+ newLayers.forEach { self.segmentPoseOverlay.addSublayer($0) }
+ print("update complete")
+ print("Time elapsed: \(Date().timeIntervalSince(startTime))")
+ self.isUpdating = false // フラグを解除
+ }
+ }
+ func generateColoredMaskImage(
+ from masksIn: MLMultiArray, protos: MLMultiArray, in size: CGSize, colorIndex: Int,
+ boundingBox: CGRect
+ ) -> CGImage? {
+ let maskWidth = protos.shape[3].intValue
+ let maskHeight = protos.shape[2].intValue
+ let maskChannels = protos.shape[1].intValue
+
+ guard protos.shape.count == 4, protos.shape[0].intValue == 1, masksIn.shape.count == 1,
+ masksIn.shape[0].intValue == maskChannels
+ else {
+ print("Invalid shapes for protos or masksIn")
+ return nil
+ }
+
+ let masksPointer = masksIn.dataPointer.assumingMemoryBound(to: Float.self)
+ let protosPointer = protos.dataPointer.assumingMemoryBound(to: Float.self)
+
+ let masksPointerOutput = UnsafeMutablePointer.allocate(capacity: maskHeight * maskWidth)
+ vDSP_mmul(
+ masksPointer, 1, protosPointer, 1, masksPointerOutput, 1, vDSP_Length(1),
+ vDSP_Length(maskHeight * maskWidth), vDSP_Length(maskChannels))
+
+ let threshold: Float = 0.5
+ let color = colorsForMask[colorIndex]
+ let red = UInt8(color.red)
+ let green = UInt8(color.green)
+ let blue = UInt8(color.blue)
+
+ var maskPixels = [UInt8](repeating: 0, count: maskHeight * maskWidth * 4)
+ for y in 0.. threshold {
+ let pixelIndex = index * 4
+ maskPixels[pixelIndex] = red
+ maskPixels[pixelIndex + 1] = green
+ maskPixels[pixelIndex + 2] = blue
+ maskPixels[pixelIndex + 3] = 255
+ }
+ }
+ }
+
+ let maskDataPointer = UnsafeMutablePointer.allocate(capacity: maskPixels.count)
+ maskDataPointer.initialize(from: maskPixels, count: maskPixels.count)
+
+ let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue)
+ let colorSpace = CGColorSpaceCreateDeviceRGB()
+
+ let maskDataProvider = CGDataProvider(
+ dataInfo: nil, data: maskDataPointer, size: maskPixels.count
+ ) { _, data, _ in
+ data.deallocate()
+ }
+
+ guard
+ let maskCGImage = CGImage(
+ width: maskWidth, height: maskHeight, bitsPerComponent: 8, bitsPerPixel: 32,
+ bytesPerRow: maskWidth * 4, space: colorSpace, bitmapInfo: bitmapInfo,
+ provider: maskDataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent)
+ else {
+ masksPointerOutput.deallocate()
+ return nil
+ }
+
+ let maskCIImage = CIImage(cgImage: maskCGImage)
+ let scaledCIImage = maskCIImage.transformed(
+ by: CGAffineTransform(
+ scaleX: size.width / CGFloat(maskWidth), y: size.height / CGFloat(maskHeight)))
+ let invertedY = size.height - (boundingBox.origin.y + boundingBox.height) * size.height / 640.0
+ let cropRect = CGRect(
+ x: boundingBox.origin.x * size.width / 640.0, y: invertedY,
+ width: boundingBox.width * size.width / 640.0,
+ height: boundingBox.height * size.height / 640.0)
+
+ let croppedCIImage = scaledCIImage.cropped(to: cropRect)
+
+ let ciContext = CIContext()
+ guard let cgImage = ciContext.createCGImage(croppedCIImage, from: cropRect) else {
+ masksPointerOutput.deallocate()
+ return nil
+ }
+
+ masksPointerOutput.deallocate()
+
+ return cgImage
+ }
+
+ func removeAllMaskSubLayers() {
+ self.segmentPoseOverlay.sublayers?.forEach { layer in
+ layer.removeFromSuperlayer()
+ }
+ self.segmentPoseOverlay.sublayers = nil
+ }
+
+ func adjustBox(_ box: CGRect, toFitIn containerSize: CGSize) -> CGRect {
+ let xScale = containerSize.width / 640.0
+ let yScale = containerSize.height / 640.0
+ return CGRect(
+ x: box.origin.x * xScale, y: box.origin.y * yScale, width: box.size.width * xScale,
+ height: box.size.height * yScale)
+ }
+}
+
+extension UIColor {
+ func toRGBComponents() -> (red: UInt8, green: UInt8, blue: UInt8)? {
+ var red: CGFloat = 0
+ var green: CGFloat = 0
+ var blue: CGFloat = 0
+ var alpha: CGFloat = 0
+
+ let success = self.getRed(&red, green: &green, blue: &blue, alpha: &alpha)
+
+ if success {
+ let redUInt8 = UInt8(red * 255.0)
+ let greenUInt8 = UInt8(green * 255.0)
+ let blueUInt8 = UInt8(blue * 255.0)
+ return (red: redUInt8, green: greenUInt8, blue: blueUInt8)
+ } else {
+ return nil
+ }
+ }
+}
+
+func nonMaxSuppression(boxes: [CGRect], scores: [Float], threshold: Float) -> [Int] {
+ let sortedIndices = scores.enumerated().sorted { $0.element > $1.element }.map { $0.offset }
+ var selectedIndices = [Int]()
+ var activeIndices = [Bool](repeating: true, count: boxes.count)
+
+ for i in 0.. CGFloat(threshold) * min(boxes[idx].area, boxes[otherIdx].area) {
+ activeIndices[otherIdx] = false
+ }
+ }
+ }
+ }
+ }
+ return selectedIndices
+}
+
+extension CGRect {
+ var area: CGFloat {
+ return width * height
+ }
+}
diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift
index 840d91d..578dba1 100644
--- a/YOLO/ViewController.swift
+++ b/YOLO/ViewController.swift
@@ -1,7 +1,7 @@
// Ultralytics YOLO 🚀 - AGPL-3.0 License
//
// Main View Controller for Ultralytics YOLO App
-// This file is part of the Ultralytics YOLO app, enabling real-time object detection using YOLOv8 models on iOS devices.
+// This file is part of the Ultralytics YOLO app, enabling real-time object detection using YOLO11 models on iOS devices.
// Licensed under AGPL-3.0. For commercial use, refer to Ultralytics licensing: https://ultralytics.com/license
// Access the source code: https://github.com/ultralytics/yolo-ios-app
//
@@ -17,12 +17,12 @@ import CoreMedia
import UIKit
import Vision
-var mlModel = try! yolov8m(configuration: .init()).model
+var mlModel = try! yolo11m(configuration: .init()).model
class ViewController: UIViewController {
@IBOutlet var videoPreview: UIView!
@IBOutlet var View0: UIView!
- @IBOutlet var segmentedControl: UISegmentedControl!
+ @IBOutlet var modelSegmentedControl: UISegmentedControl!
@IBOutlet var playButtonOutlet: UIBarButtonItem!
@IBOutlet var pauseButtonOutlet: UIBarButtonItem!
@IBOutlet var slider: UISlider!
@@ -42,6 +42,11 @@ class ViewController: UIViewController {
@IBOutlet weak var activityIndicator: UIActivityIndicatorView!
@IBOutlet weak var forcus: UIImageView!
@IBOutlet weak var toolBar: UIToolbar!
+ @IBOutlet weak var taskSegmentControl: UISegmentedControl!
+
+ // views for tasks
+ var classifyOverlay: UILabel!
+ var segmentPoseOverlay: CALayer = CALayer()
let selection = UISelectionFeedbackGenerator()
var detector = try! VNCoreMLModel(for: mlModel)
@@ -55,6 +60,9 @@ class ViewController: UIViewController {
var t3 = CACurrentMediaTime() // FPS start
var t4 = 0.0 // FPS dt smoothed
// var cameraOutput: AVCapturePhotoOutput!
+ var longSide: CGFloat = 3
+ var shortSide: CGFloat = 4
+ var frameSizeCaptured = false
// Developer mode
let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings
@@ -73,16 +81,90 @@ class ViewController: UIViewController {
return request
}()
+ enum Task {
+ case detect
+ case classify
+ case segment
+ case pose
+ case obb
+ }
+
+ var task: Task = .detect
+ var confidenceThreshold: Float = 0.25
+ var iouThreshold: Float = 0.4
+
+ var classifyLabels = [String]()
+ var colorsForMasks: [(red: UInt8, green: UInt8, blue: UInt8)] = []
+ var classes: [String] = []
+
override func viewDidLoad() {
super.viewDidLoad()
- slider.value = 30
- setLabels()
- setUpBoundingBoxViews()
setUpOrientationChangeNotification()
startVideo()
+ setupUI()
// setModel()
}
+ func setupUI() {
+ slider.value = 30
+ setLabels()
+ setUpBoundingBoxViews()
+ setupColors()
+ setupClassifyOverlay()
+ setupSegmentPoseOverlay()
+ }
+
+ func switchUIForTask() {
+ switch task {
+ case .detect:
+ classifyOverlay.isHidden = true
+ segmentPoseOverlay.isHidden = true
+ updateSlider(show: true)
+ case .classify:
+ classifyOverlay.isHidden = false
+ hideBoundingBoxes()
+ segmentPoseOverlay.isHidden = true
+ updateSlider(show: false)
+ case .segment:
+ segmentPoseOverlay.isHidden = false
+ hideBoundingBoxes()
+ classifyOverlay.isHidden = true
+ updateSlider(show: true)
+ case .pose:
+ segmentPoseOverlay.isHidden = false
+ hideBoundingBoxes()
+ classifyOverlay.isHidden = true
+ updateSlider(show: true)
+ default:
+ break
+ }
+ }
+
+ func updateSlider(show: Bool) {
+ if show {
+ labelSlider.isHidden = false
+ labelSliderConf.isHidden = false
+ labelSliderIoU.isHidden = false
+ labelSliderConfLandScape.isHidden = false
+ labelSliderIoULandScape.isHidden = false
+ slider.isHidden = false
+ sliderConf.isHidden = false
+ sliderIoU.isHidden = false
+ sliderConfLandScape.isHidden = false
+ sliderIoULandScape.isHidden = false
+
+ } else {
+ labelSlider.isHidden = true
+ labelSliderConf.isHidden = true
+ labelSliderIoU.isHidden = true
+ labelSliderConfLandScape.isHidden = true
+ labelSliderIoULandScape.isHidden = true
+ sliderConfLandScape.isHidden = true
+ sliderIoULandScape.isHidden = true
+
+ }
+ }
+
override func viewWillTransition(
to size: CGSize, with coordinator: any UIViewControllerTransitionCoordinator
) {
@@ -116,7 +198,15 @@ class ViewController: UIViewController {
}
self.videoCapture.previewLayer?.frame = CGRect(
x: 0, y: 0, width: size.width, height: size.height)
+ coordinator.animate(
+ alongsideTransition: { context in
+ },
+ completion: { context in
+ self.setupSegmentPoseOverlay()
+ self.updateClassifyOverlay()
+ }
+ )
}
private func setUpOrientationChangeNotification() {
@@ -127,58 +217,63 @@ class ViewController: UIViewController {
@objc func orientationDidChange() {
videoCapture.updateVideoOrientation()
+ // frameSizeCaptured = false
}
@IBAction func vibrate(_ sender: Any) {
selection.selectionChanged()
}
- @IBAction func indexChanged(_ sender: Any) {
- selection.selectionChanged()
- activityIndicator.startAnimating()
-
- /// Switch model
- switch segmentedControl.selectedSegmentIndex {
+ @IBAction func taskSegmentChanged(_ sender: UISegmentedControl) {
+ switch sender.selectedSegmentIndex {
case 0:
- self.labelName.text = "YOLOv8n"
- mlModel = try! yolov8n(configuration: .init()).model
+ task = .detect
+ modelSegmentedControl.setEnabled(true, forSegmentAt: 1)
+ modelSegmentedControl.setEnabled(true, forSegmentAt: 2)
+ modelSegmentedControl.setEnabled(true, forSegmentAt: 3)
+ modelSegmentedControl.setEnabled(true, forSegmentAt: 4)
case 1:
- self.labelName.text = "YOLOv8s"
- mlModel = try! yolov8s(configuration: .init()).model
+ task = .classify
+ modelSegmentedControl.selectedSegmentIndex = 0
+ updateModelSegmentControl(enableModelIndex: [0, 1, 2, 3, 4], unableModelIndex: [])
+ showClassifyUI()
case 2:
- self.labelName.text = "YOLOv8m"
- mlModel = try! yolov8m(configuration: .init()).model
+ task = .segment
+ modelSegmentedControl.selectedSegmentIndex = 0
+ updateModelSegmentControl(enableModelIndex: [0], unableModelIndex: [1, 2, 3, 4])
case 3:
- self.labelName.text = "YOLOv8l"
- mlModel = try! yolov8l(configuration: .init()).model
- case 4:
- self.labelName.text = "YOLOv8x"
- mlModel = try! yolov8x(configuration: .init()).model
+ task = .pose
+ modelSegmentedControl.selectedSegmentIndex = 0
+ updateModelSegmentControl(enableModelIndex: [0], unableModelIndex: [1, 2, 3, 4])
default:
- break
+ updateModelSegmentControl(enableModelIndex: [], unableModelIndex: [0, 1, 2, 3, 4])
}
+ switchUIForTask()
setModel()
- setUpBoundingBoxViews()
- activityIndicator.stopAnimating()
+ if task == .classify {
+ setupClassifyLabels()
+ } else {
+ setupColors()
+ }
}
- func setModel() {
+ func updateModelSegmentControl(enableModelIndex: [Int], unableModelIndex: [Int]) {
+ for index in enableModelIndex {
+ modelSegmentedControl.setEnabled(true, forSegmentAt: index)
+ }
- /// VNCoreMLModel
- detector = try! VNCoreMLModel(for: mlModel)
- detector.featureProvider = ThresholdProvider()
+ for index in unableModelIndex {
+ modelSegmentedControl.setEnabled(false, forSegmentAt: index)
+ }
+ }
- /// VNCoreMLRequest
- let request = VNCoreMLRequest(
- model: detector,
- completionHandler: { [weak self] request, error in
- self?.processObservations(for: request, error: error)
- })
- request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop
- visionRequest = request
- t2 = 0.0 // inference dt smoothed
- t3 = CACurrentMediaTime() // FPS start
- t4 = 0.0 // FPS dt smoothed
+ @IBAction func indexChanged(_ sender: Any) {
+ selection.selectionChanged()
+ activityIndicator.startAnimating()
+ setModel()
+ setUpBoundingBoxViews()
+ setupColors()
+ activityIndicator.stopAnimating()
}
/// Update thresholds from slider values
@@ -222,7 +317,7 @@ class ViewController: UIViewController {
}
func setLabels() {
- self.labelName.text = "YOLOv8m"
+ self.labelName.text = "YOLO11m"
self.labelVersion.text = "Version " + UserDefaults.standard.string(forKey: "app_version")!
}
@@ -279,6 +374,29 @@ class ViewController: UIViewController {
let maxBoundingBoxViews = 100
var boundingBoxViews = [BoundingBoxView]()
var colors: [String: UIColor] = [:]
+ var colorsForMask: [(red: UInt8, green: UInt8, blue: UInt8)] = []
+ let ultralyticsColorsolors: [UIColor] = [
+ UIColor(red: 4 / 255, green: 42 / 255, blue: 255 / 255, alpha: 0.6), // #042AFF
+ UIColor(red: 11 / 255, green: 219 / 255, blue: 235 / 255, alpha: 0.6), // #0BDBEB
+ UIColor(red: 243 / 255, green: 243 / 255, blue: 243 / 255, alpha: 0.6), // #F3F3F3
+ UIColor(red: 0 / 255, green: 223 / 255, blue: 183 / 255, alpha: 0.6), // #00DFB7
+ UIColor(red: 17 / 255, green: 31 / 255, blue: 104 / 255, alpha: 0.6), // #111F68
+ UIColor(red: 255 / 255, green: 111 / 255, blue: 221 / 255, alpha: 0.6), // #FF6FDD
+ UIColor(red: 255 / 255, green: 68 / 255, blue: 79 / 255, alpha: 0.6), // #FF444F
+ UIColor(red: 204 / 255, green: 237 / 255, blue: 0 / 255, alpha: 0.6), // #CCED00
+ UIColor(red: 0 / 255, green: 243 / 255, blue: 68 / 255, alpha: 0.6), // #00F344
+ UIColor(red: 189 / 255, green: 0 / 255, blue: 255 / 255, alpha: 0.6), // #BD00FF
+ UIColor(red: 0 / 255, green: 180 / 255, blue: 255 / 255, alpha: 0.6), // #00B4FF
+ UIColor(red: 221 / 255, green: 0 / 255, blue: 186 / 255, alpha: 0.6), // #DD00BA
+ UIColor(red: 0 / 255, green: 255 / 255, blue: 255 / 255, alpha: 0.6), // #00FFFF
+ UIColor(red: 38 / 255, green: 192 / 255, blue: 0 / 255, alpha: 0.6), // #26C000
+ UIColor(red: 1 / 255, green: 255 / 255, blue: 179 / 255, alpha: 0.6), // #01FFB3
+ UIColor(red: 125 / 255, green: 36 / 255, blue: 255 / 255, alpha: 0.6), // #7D24FF
+ UIColor(red: 123 / 255, green: 0 / 255, blue: 104 / 255, alpha: 0.6), // #7B0068
+ UIColor(red: 255 / 255, green: 27 / 255, blue: 108 / 255, alpha: 0.6), // #FF1B6C
+ UIColor(red: 252 / 255, green: 109 / 255, blue: 47 / 255, alpha: 0.6), // #FC6D2F
+ UIColor(red: 162 / 255, green: 255 / 255, blue: 11 / 255, alpha: 0.6), // #A2FF0B
+ ]
func setUpBoundingBoxViews() {
// Ensure all bounding box views are initialized up to the maximum allowed.
@@ -287,20 +405,45 @@ class ViewController: UIViewController {
}
// Retrieve class labels directly from the CoreML model's class labels, if available.
+ }
+
+ func setupColors() {
guard let classLabels = mlModel.modelDescription.classLabels as? [String] else {
- fatalError("Class labels are missing from the model description")
+ print("Class labels are missing from the model description")
+ return
}
-
+ classes = classLabels
// Assign random colors to the classes.
+ var count = 0
for label in classLabels {
+ let color = ultralyticsColorsolors[count]
+ count += 1
+ if count > 19 {
+ count = 0
+ }
if colors[label] == nil { // if key not in dict
- colors[label] = UIColor(
- red: CGFloat.random(in: 0...1),
- green: CGFloat.random(in: 0...1),
- blue: CGFloat.random(in: 0...1),
- alpha: 0.6)
+ colors[label] = color
}
}
+
+ count = 0
+ for (key, color) in colors {
+ let color = ultralyticsColorsolors[count]
+ count += 1
+ if count > 19 {
+ count = 0
+ }
+ guard let colorForMask = color.toRGBComponents() else { fatalError() }
+ colorsForMask.append(colorForMask)
+ }
+ }
+
+ func setupClassifyLabels() {
+ guard let classLabels = mlModel.modelDescription.classLabels as? [String] else {
+ print("Class labels are missing from the model description")
+ return
+ }
+ classifyLabels = classLabels
}
func startVideo() {
@@ -315,6 +458,7 @@ class ViewController: UIViewController {
self.videoPreview.layer.addSublayer(previewLayer)
self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer
}
+ self.videoPreview.layer.addSublayer(self.segmentPoseOverlay)
// Add the bounding box layers to the UI, on top of the video preview.
for box in self.boundingBoxViews {
@@ -327,64 +471,6 @@ class ViewController: UIViewController {
}
}
- func predict(sampleBuffer: CMSampleBuffer) {
- if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
- currentBuffer = pixelBuffer
-
- /// - Tag: MappingOrientation
- // The frame is always oriented based on the camera sensor,
- // so in most cases Vision needs to rotate it for the model to work as expected.
- let imageOrientation: CGImagePropertyOrientation
- switch UIDevice.current.orientation {
- case .portrait:
- imageOrientation = .up
- case .portraitUpsideDown:
- imageOrientation = .down
- case .landscapeLeft:
- imageOrientation = .up
- case .landscapeRight:
- imageOrientation = .up
- case .unknown:
- imageOrientation = .up
- default:
- imageOrientation = .up
- }
-
- // Invoke a VNRequestHandler with that image
- let handler = VNImageRequestHandler(
- cvPixelBuffer: pixelBuffer, orientation: imageOrientation, options: [:])
- if UIDevice.current.orientation != .faceUp { // stop if placed down on a table
- t0 = CACurrentMediaTime() // inference start
- do {
- try handler.perform([visionRequest])
- } catch {
- print(error)
- }
- t1 = CACurrentMediaTime() - t0 // inference dt
- }
-
- currentBuffer = nil
- }
- }
-
- func processObservations(for request: VNRequest, error: Error?) {
- DispatchQueue.main.async {
- if let results = request.results as? [VNRecognizedObjectObservation] {
- self.show(predictions: results)
- } else {
- self.show(predictions: [])
- }
-
- // Measure FPS
- if self.t1 < 10.0 { // valid dt
- self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time
- }
- self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS
- self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms
- self.t3 = CACurrentMediaTime()
- }
- }
-
// Save text file
func saveText(text: String, file: String = "saved.txt") {
if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first {
@@ -448,134 +534,6 @@ class ViewController: UIViewController {
}
}
- func show(predictions: [VNRecognizedObjectObservation]) {
- let width = videoPreview.bounds.width // 375 pix
- let height = videoPreview.bounds.height // 812 pix
- var str = ""
-
- // ratio = videoPreview AR divided by sessionPreset AR
- var ratio: CGFloat = 1.0
- if videoCapture.captureSession.sessionPreset == .photo {
- ratio = (height / width) / (4.0 / 3.0) // .photo
- } else {
- ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc.
- }
-
- // date
- let date = Date()
- let calendar = Calendar.current
- let hour = calendar.component(.hour, from: date)
- let minutes = calendar.component(.minute, from: date)
- let seconds = calendar.component(.second, from: date)
- let nanoseconds = calendar.component(.nanosecond, from: date)
- let sec_day =
- Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day
-
- self.labelSlider.text =
- String(predictions.count) + " items (max " + String(Int(slider.value)) + ")"
- for i in 0..= 1 { // iPhone ratio = 1.218
- let offset = (1 - ratio) * (0.5 - rect.minX)
- let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1)
- rect = rect.applying(transform)
- rect.size.width *= ratio
- } else { // iPad ratio = 0.75
- let offset = (ratio - 1) * (0.5 - rect.maxY)
- let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1)
- rect = rect.applying(transform)
- ratio = (height / width) / (3.0 / 4.0)
- rect.size.height /= ratio
- }
-
- // Scale normalized to pixels [375, 812] [width, height]
- rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height))
-
- // The labels array is a list of VNClassificationObservation objects,
- // with the highest scoring class first in the list.
- let bestClass = prediction.labels[0].identifier
- let confidence = prediction.labels[0].confidence
- // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels)
- let label = String(format: "%@ %.1f", bestClass, confidence * 100)
- let alpha = CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)
- // Show the bounding box.
- boundingBoxViews[i].show(
- frame: rect,
- label: label,
- color: colors[bestClass] ?? UIColor.white,
- alpha: alpha) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0)
-
- if developerMode {
- // Write
- if save_detections {
- str += String(
- format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n",
- sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence,
- rect.origin.x, rect.origin.y, rect.size.width, rect.size.height)
- }
-
- // Action trigger upon detection
- // if false {
- // if (bestClass == "car") { // "cell phone", "car", "person"
- // self.takePhoto(nil)
- // // self.pauseButton(nil)
- // sleep(2)
- // }
- // }
- }
- } else {
- boundingBoxViews[i].hide()
- }
- }
-
- // Write
- if developerMode {
- if save_detections {
- saveText(text: str, file: "detections.txt") // Write stats for each detection
- }
- if save_frames {
- str = String(
- format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n",
- sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel,
- self.t1 * 1000, self.t2 * 1000, 1 / self.t4)
- saveText(text: str, file: "frames.txt") // Write stats for each image
- }
- }
-
- // Debug
- // print(str)
- // print(UIDevice.current.identifierForVendor!)
- // saveImage()
- }
-
// Pinch to Zoom Start ---------------------------------------------------------------------------------------------
let minimumZoom: CGFloat = 1.0
let maximumZoom: CGFloat = 10.0