From 245952159f8d05d5431aee693b124e2fd3504ae5 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Thu, 25 Jul 2024 11:43:11 +0900 Subject: [PATCH 01/26] A preview is now displayed when sharing screenshots. --- YOLO.xcodeproj/project.pbxproj | 24 +- .../xcshareddata/xcschemes/YOLO.xcscheme | 78 ++++++ YOLO/Info.plist | 2 +- YOLO/ViewController.swift | 263 +++++++++++------- 4 files changed, 243 insertions(+), 124 deletions(-) create mode 100644 YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index ddcf965..ee069a4 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -13,11 +13,6 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */; }; - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */; }; - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */; }; - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; @@ -35,11 +30,6 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; @@ -87,11 +77,6 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( - 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, - 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, - 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, - 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, - 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, ); path = Models; sourceTree = ""; @@ -210,13 +195,8 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( - 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, - 6381D21C2B7817C200ABA4E8 /* yolov8n.mlpackage in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, - 6381D2182B7817C200ABA4E8 /* yolov8l.mlpackage in Sources */, - 6381D21A2B7817C200ABA4E8 /* yolov8s.mlpackage in Sources */, - 6381D2192B7817C200ABA4E8 /* yolov8x.mlpackage in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, @@ -351,7 +331,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -379,7 +359,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = 3MR4P6CL3X; + DEVELOPMENT_TEAM = ""; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; diff --git a/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme b/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme new file mode 100644 index 0000000..3bb677d --- /dev/null +++ b/YOLO.xcodeproj/xcshareddata/xcschemes/YOLO.xcscheme @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/YOLO/Info.plist b/YOLO/Info.plist index c36dbc0..1fb0a90 100644 --- a/YOLO/Info.plist +++ b/YOLO/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 24 + 27 ITSAppUsesNonExemptEncryption LSRequiresIPhoneOS diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index 025a3de..672663a 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -36,7 +36,8 @@ class ViewController: UIViewController { @IBOutlet weak var labelSliderConf: UILabel! @IBOutlet weak var labelSliderIoU: UILabel! @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - + var screenshotImageView:UIImageView? + let selection = UISelectionFeedbackGenerator() var detector = try! VNCoreMLModel(for: mlModel) var session: AVCaptureSession! @@ -49,12 +50,12 @@ class ViewController: UIViewController { var t3 = CACurrentMediaTime() // FPS start var t4 = 0.0 // FPS dt smoothed // var cameraOutput: AVCapturePhotoOutput! - + // Developer mode let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings let save_detections = false // write every detection to detections.txt let save_frames = false // write every frame to frames.txt - + lazy var visionRequest: VNCoreMLRequest = { let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in @@ -64,7 +65,7 @@ class ViewController: UIViewController { request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop return request }() - + override func viewDidLoad() { super.viewDidLoad() slider.value = 30 @@ -73,15 +74,15 @@ class ViewController: UIViewController { startVideo() // setModel() } - + @IBAction func vibrate(_ sender: Any) { selection.selectionChanged() } - + @IBAction func indexChanged(_ sender: Any) { selection.selectionChanged() activityIndicator.startAnimating() - + /// Switch model switch segmentedControl.selectedSegmentIndex { case 0: @@ -106,12 +107,12 @@ class ViewController: UIViewController { setUpBoundingBoxViews() activityIndicator.stopAnimating() } - + func setModel() { /// VNCoreMLModel detector = try! VNCoreMLModel(for: mlModel) detector.featureProvider = ThresholdProvider() - + /// VNCoreMLRequest let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in self?.processObservations(for: request, error: error) @@ -122,7 +123,7 @@ class ViewController: UIViewController { t3 = CACurrentMediaTime() // FPS start t4 = 0.0 // FPS dt smoothed } - + /// Update thresholds from slider values @IBAction func sliderChanged(_ sender: Any) { let conf = Double(round(100 * sliderConf.value)) / 100 @@ -131,10 +132,10 @@ class ViewController: UIViewController { self.labelSliderIoU.text = String(iou) + " IoU Threshold" detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) } - + @IBAction func takePhoto(_ sender: Any?) { let t0 = DispatchTime.now().uptimeNanoseconds - + // 1. captureSession and cameraOutput // session = videoCapture.captureSession // session = AVCaptureSession() // session.sessionPreset = AVCaptureSession.Preset.photo @@ -142,76 +143,67 @@ class ViewController: UIViewController { // cameraOutput.isHighResolutionCaptureEnabled = true // cameraOutput.isDualCameraDualPhotoDeliveryEnabled = true // print("1 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) - + // 2. Settings let settings = AVCapturePhotoSettings() // settings.flashMode = .off // settings.isHighResolutionPhotoEnabled = cameraOutput.isHighResolutionCaptureEnabled // settings.isDualCameraDualPhotoDeliveryEnabled = self.videoCapture.cameraOutput.isDualCameraDualPhotoDeliveryEnabled - + // 3. Capture Photo usleep(20_000) // short 10 ms delay to allow camera to focus self.videoCapture.cameraOutput.capturePhoto(with: settings, delegate: self as AVCapturePhotoCaptureDelegate) print("3 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) } - + @IBAction func logoButton(_ sender: Any) { selection.selectionChanged() if let link = URL(string: "https://www.ultralytics.com") { UIApplication.shared.open(link) } } - + func setLabels() { self.labelName.text = "YOLOv8m" self.labelVersion.text = "Version " + UserDefaults.standard.string(forKey: "app_version")! } - + @IBAction func playButton(_ sender: Any) { selection.selectionChanged() self.videoCapture.start() playButtonOutlet.isEnabled = false pauseButtonOutlet.isEnabled = true } - + @IBAction func pauseButton(_ sender: Any?) { selection.selectionChanged() self.videoCapture.stop() playButtonOutlet.isEnabled = true pauseButtonOutlet.isEnabled = false } - + @IBAction func switchCameraTapped(_ sender: Any) { self.videoCapture.captureSession.beginConfiguration() let currentInput = self.videoCapture.captureSession.inputs.first as? AVCaptureDeviceInput self.videoCapture.captureSession.removeInput(currentInput!) // let newCameraDevice = currentInput?.device == .builtInWideAngleCamera ? getCamera(with: .front) : getCamera(with: .back) - + let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back)! guard let videoInput1 = try? AVCaptureDeviceInput(device: device) else { return } - + self.videoCapture.captureSession.addInput(videoInput1) self.videoCapture.captureSession.commitConfiguration() } - + // share image @IBAction func shareButton(_ sender: Any) { selection.selectionChanged() - let bounds = UIScreen.main.bounds - //let bounds = self.View0.bounds - - UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) - self.View0.drawHierarchy(in: bounds, afterScreenUpdates: false) - let img = UIGraphicsGetImageFromCurrentImageContext() - UIGraphicsEndImageContext() - let activityViewController = UIActivityViewController(activityItems: [img!], applicationActivities: nil) - activityViewController.popoverPresentationController?.sourceView = self.View0 - self.present(activityViewController, animated: true, completion: nil) - // playButton("") + let settings = AVCapturePhotoSettings() + self.videoCapture.cameraOutput.capturePhoto(with: settings, delegate: self as AVCapturePhotoCaptureDelegate) } - + // share screenshot @IBAction func saveScreenshotButton(_ shouldSave: Bool = true) { // let layer = UIApplication.shared.keyWindow!.layer @@ -220,41 +212,41 @@ class ViewController: UIViewController { // layer.render(in: UIGraphicsGetCurrentContext()!) // let screenshot = UIGraphicsGetImageFromCurrentImageContext() // UIGraphicsEndImageContext() - + // let screenshot = UIApplication.shared.screenShot // UIImageWriteToSavedPhotosAlbum(screenshot!, nil, nil, nil) } - + let maxBoundingBoxViews = 100 var boundingBoxViews = [BoundingBoxView]() var colors: [String: UIColor] = [:] - + func setUpBoundingBoxViews() { // Ensure all bounding box views are initialized up to the maximum allowed. while boundingBoxViews.count < maxBoundingBoxViews { boundingBoxViews.append(BoundingBoxView()) } - + // Retrieve class labels directly from the CoreML model's class labels, if available. guard let classLabels = mlModel.modelDescription.classLabels as? [String] else { fatalError("Class labels are missing from the model description") } - + // Assign random colors to the classes. for label in classLabels { if colors[label] == nil { // if key not in dict colors[label] = UIColor(red: CGFloat.random(in: 0...1), - green: CGFloat.random(in: 0...1), - blue: CGFloat.random(in: 0...1), - alpha: 0.6) + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) } } } - + func startVideo() { videoCapture = VideoCapture() videoCapture.delegate = self - + videoCapture.setUp(sessionPreset: .photo) { success in // .hd4K3840x2160 or .photo (4032x3024) Warning: 4k may not work on all devices i.e. 2019 iPod if success { @@ -263,22 +255,22 @@ class ViewController: UIViewController { self.videoPreview.layer.addSublayer(previewLayer) self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer } - + // Add the bounding box layers to the UI, on top of the video preview. for box in self.boundingBoxViews { box.addToLayer(self.videoPreview.layer) } - + // Once everything is set up, we can start capturing live video. self.videoCapture.start() } } } - + func predict(sampleBuffer: CMSampleBuffer) { if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { currentBuffer = pixelBuffer - + /// - Tag: MappingOrientation // The frame is always oriented based on the camera sensor, // so in most cases Vision needs to rotate it for the model to work as expected. @@ -298,7 +290,7 @@ class ViewController: UIViewController { default: imageOrientation = .up } - + // Invoke a VNRequestHandler with that image let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: imageOrientation, options: [:]) if UIDevice.current.orientation != .faceUp { // stop if placed down on a table @@ -310,11 +302,11 @@ class ViewController: UIViewController { } t1 = CACurrentMediaTime() - t0 // inference dt } - + currentBuffer = nil } } - + func processObservations(for request: VNRequest, error: Error?) { DispatchQueue.main.async { if let results = request.results as? [VNRecognizedObjectObservation] { @@ -322,7 +314,7 @@ class ViewController: UIViewController { } else { self.show(predictions: []) } - + // Measure FPS if self.t1 < 10.0 { // valid dt self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time @@ -332,12 +324,12 @@ class ViewController: UIViewController { self.t3 = CACurrentMediaTime() } } - + // Save text file func saveText(text: String, file: String = "saved.txt") { if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { let fileURL = dir.appendingPathComponent(file) - + // Writing do { // Append to file if it exists let fileHandle = try FileHandle(forWritingTo: fileURL) @@ -351,12 +343,12 @@ class ViewController: UIViewController { print("no file written") } } - + // Reading // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} } } - + // Save image file func saveImage() { let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first @@ -364,7 +356,7 @@ class ViewController: UIViewController { let image = UIImage(named: "ultralytics_yolo_logotype.png") FileManager.default.createFile(atPath: fileURL.path, contents: image!.jpegData(compressionQuality: 0.5), attributes: nil) } - + // Return hard drive space (GB) func freeSpace() -> Double { let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) @@ -376,7 +368,7 @@ class ViewController: UIViewController { } return 0 } - + // Return RAM usage (GB) func memoryUsage() -> Double { var taskInfo = mach_task_basic_info() @@ -392,12 +384,12 @@ class ViewController: UIViewController { return 0 } } - + func show(predictions: [VNRecognizedObjectObservation]) { let width = videoPreview.bounds.width // 375 pix let height = videoPreview.bounds.height // 812 pix var str = "" - + // ratio = videoPreview AR divided by sessionPreset AR var ratio: CGFloat = 1.0 if videoCapture.captureSession.sessionPreset == .photo { @@ -405,7 +397,7 @@ class ViewController: UIViewController { } else { ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. } - + // date let date = Date() let calendar = Calendar.current @@ -414,35 +406,35 @@ class ViewController: UIViewController { let seconds = calendar.component(.second, from: date) let nanoseconds = calendar.component(.nanosecond, from: date) let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day - + self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" for i in 0..= 1 { // iPhone ratio = 1.218 let offset = (1 - ratio) * (0.5 - rect.minX) let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) @@ -454,30 +446,30 @@ class ViewController: UIViewController { rect = rect.applying(transform) rect.size.height /= ratio } - + // Scale normalized to pixels [375, 812] [width, height] rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - + // The labels array is a list of VNClassificationObservation objects, // with the highest scoring class first in the list. let bestClass = prediction.labels[0].identifier let confidence = prediction.labels[0].confidence // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - + // Show the bounding box. boundingBoxViews[i].show(frame: rect, - label: String(format: "%@ %.1f", bestClass, confidence * 100), - color: colors[bestClass] ?? UIColor.white, - alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) - + label: String(format: "%@ %.1f", bestClass, confidence * 100), + color: colors[bestClass] ?? UIColor.white, + alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) + if developerMode { // Write if save_detections { str += String(format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) } - + // Action trigger upon detection // if false { // if (bestClass == "car") { // "cell phone", "car", "person" @@ -491,7 +483,7 @@ class ViewController: UIViewController { boundingBoxViews[i].hide() } } - + // Write if developerMode { if save_detections { @@ -499,31 +491,31 @@ class ViewController: UIViewController { } if save_frames { str = String(format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", - sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, - self.t1 * 1000, self.t2 * 1000, 1 / self.t4) + sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, + self.t1 * 1000, self.t2 * 1000, 1 / self.t4) saveText(text: str, file: "frames.txt") // Write stats for each image } } - + // Debug // print(str) // print(UIDevice.current.identifierForVendor!) // saveImage() } - + // Pinch to Zoom Start --------------------------------------------------------------------------------------------- let minimumZoom: CGFloat = 1.0 let maximumZoom: CGFloat = 10.0 var lastZoomFactor: CGFloat = 1.0 - + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { let device = videoCapture.captureDevice - + // Return zoom value between the minimum and maximum zoom values func minMaxZoom(_ factor: CGFloat) -> CGFloat { return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) } - + func update(scale factor: CGFloat) { do { try device.lockForConfiguration() @@ -535,7 +527,7 @@ class ViewController: UIViewController { print("\(error.localizedDescription)") } } - + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) switch pinch.state { case .began: fallthrough @@ -549,7 +541,44 @@ class ViewController: UIViewController { self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) default: break } - } // Pinch to Zoom Start ------------------------------------------------------------------------------------------ + } // Pinch to Zoom Start + + func showShareAlert(image: UIImage) { + let alertController = UIAlertController(title: "Do you want to share this image?", message: nil, preferredStyle: .alert) + + let shareAction = UIAlertAction(title: "OK", style: .default) { _ in + self.shareImage(image: image) + } + + let cancelAction = UIAlertAction(title: "Cancel", style: .cancel) { _ in + self.hideScreenshotImageView() + } + + alertController.addAction(shareAction) + alertController.addAction(cancelAction) + + if let popoverController = alertController.popoverPresentationController { + popoverController.sourceView = self.view + popoverController.sourceRect = CGRect(x: self.view.bounds.midX, y: self.view.bounds.maxY - 100, width: 0, height: 0) + popoverController.permittedArrowDirections = [] + } + + present(alertController, animated: true, completion: nil) + } + + func shareImage(image: UIImage) { + let activityViewController = UIActivityViewController(activityItems: [image], applicationActivities: nil) + activityViewController.popoverPresentationController?.sourceView = self.View0 + self.present(activityViewController, animated: true) { + self.hideScreenshotImageView() + } + } + + func hideScreenshotImageView() { + self.screenshotImageView?.removeFromSuperview() + self.screenshotImageView = nil + } + // ------------------------------------------------------------------------------------------ } // ViewController class End extension ViewController: VideoCaptureDelegate { @@ -565,18 +594,50 @@ extension ViewController: AVCapturePhotoCaptureDelegate { print("error occurred : \(error.localizedDescription)") } if let dataImage = photo.fileDataRepresentation() { - print(UIImage(data: dataImage)?.size as Any) let dataProvider = CGDataProvider(data: dataImage as CFData) let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - let image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: UIImage.Orientation.right) - - // Save to camera roll - UIImageWriteToSavedPhotosAlbum(image, nil, nil, nil); + var orientation = UIImage.Orientation.right + switch UIDevice.current.orientation { + case .landscapeLeft: + orientation = .up + case .landscapeRight: + orientation = .down + default: + break + } + var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: orientation) + + let imageView = UIImageView(image: image) + imageView.contentMode = .scaleAspectFill + imageView.frame = videoPreview.frame + let imageLayer = imageView.layer + var sublayers = videoPreview.layer.sublayers ?? [] + let insertIndex = max(sublayers.count - 1, 0) + videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) + + let bounds = UIScreen.main.bounds + UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) + self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) + let img = UIGraphicsGetImageFromCurrentImageContext() + UIGraphicsEndImageContext() + imageLayer.removeFromSuperlayer() + + let screenshotImageView = UIImageView(image: img) + screenshotImageView.frame = view.bounds + screenshotImageView.contentMode = .scaleAspectFit + view.addSubview(screenshotImageView) + self.screenshotImageView = screenshotImageView + + UIView.animate(withDuration: 0.3, animations: { + screenshotImageView.frame = CGRect(x: 20, y: 100, width: self.view.bounds.width - 40, height: self.view.bounds.height - 200) + }) { _ in + self.showShareAlert(image: img!) + } +// +// // Save to camera roll +// UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); } else { print("AVCapturePhotoCaptureDelegate Error") } } } - - - From f57698fde64de64746c681a2009d278261ceb45a Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Thu, 25 Jul 2024 02:45:07 +0000 Subject: [PATCH 02/26] Auto-format by https://ultralytics.com/actions --- README.md | 45 +++++++++++++++++++++---------------------- YOLO/Models/README.md | 24 +++++++++++------------ 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index c5374a4..d4f2c23 100644 --- a/README.md +++ b/README.md @@ -46,44 +46,43 @@ Ensure you have the following before you start: 1. **Clone the Repository:** - ```sh - git clone https://github.com/ultralytics/yolo-ios-app.git - ``` + ```sh + git clone https://github.com/ultralytics/yolo-ios-app.git + ``` 2. **Open the Project in Xcode:** - Navigate to the cloned directory and open the `YOLO.xcodeproj` file. + Navigate to the cloned directory and open the `YOLO.xcodeproj` file. -

- XCode load project screenshot -

+

+ XCode load project screenshot +

- In Xcode, go to the project's target settings and choose your Apple Developer account under the "Signing & Capabilities" tab. + In Xcode, go to the project's target settings and choose your Apple Developer account under the "Signing & Capabilities" tab. 3. **Add YOLOv8 Models to the Project:** - Export CoreML INT8 models using the `ultralytics` Python package (with `pip install ultralytics`), or download them from our [GitHub release assets](https://github.com/ultralytics/yolo-ios-app/releases). You should have 5 YOLOv8 models in total. Place these in the `YOLO/Models` directory as seen in the Xcode screenshot below. + Export CoreML INT8 models using the `ultralytics` Python package (with `pip install ultralytics`), or download them from our [GitHub release assets](https://github.com/ultralytics/yolo-ios-app/releases). You should have 5 YOLOv8 models in total. Place these in the `YOLO/Models` directory as seen in the Xcode screenshot below. - ```python - from ultralytics import YOLO + ```python + from ultralytics import YOLO - # Loop through all YOLOv8 model sizes - for size in ("n", "s", "m", "l", "x"): + # Loop through all YOLOv8 model sizes + for size in ("n", "s", "m", "l", "x"): + # Load a YOLOv8 PyTorch model + model = YOLO(f"yolov8{size}.pt") - # Load a YOLOv8 PyTorch model - model = YOLO(f"yolov8{size}.pt") - - # Export the PyTorch model to CoreML INT8 format with NMS layers - model.export(format="coreml", int8=True, nms=True, imgsz=[640, 384]) - ``` + # Export the PyTorch model to CoreML INT8 format with NMS layers + model.export(format="coreml", int8=True, nms=True, imgsz=[640, 384]) + ``` 4. **Run the Ultralytics YOLO iOS App:** - Connect your iOS device and select it as the run target. Press the Run button to install the app on your device. + Connect your iOS device and select it as the run target. Press the Run button to install the app on your device. -

- Ultralytics YOLO XCode screenshot -

+

+ Ultralytics YOLO XCode screenshot +

## 🚀 Usage diff --git a/YOLO/Models/README.md b/YOLO/Models/README.md index e73239c..b70f1e5 100644 --- a/YOLO/Models/README.md +++ b/YOLO/Models/README.md @@ -23,25 +23,25 @@ If you prefer to use specific model versions or need to customize the models, yo 1. **Installation:** First, ensure you have the `ultralytics` package installed. If not, you can install it using pip: - ```sh - pip install ultralytics - ``` + ```sh + pip install ultralytics + ``` 2. **Export Models:** Use the following Python script to export YOLOv8 models to the CoreML format, optimized for INT8 quantization for better performance on iOS devices. The script exports all YOLOv8 model sizes (`n`, `s`, `m`, `l`, `x`) as CoreML models. - ```python - from ultralytics import YOLO + ```python + from ultralytics import YOLO - # Export all YOLOv8 models to CoreML INT8 - for size in ("n", "s", "m", "l", "x"): # all YOLOv8 model sizes - YOLO(f"yolov8{size}.pt").export(format="coreml", int8=True, nms=True, imgsz=[640, 384]) - ``` + # Export all YOLOv8 models to CoreML INT8 + for size in ("n", "s", "m", "l", "x"): # all YOLOv8 model sizes + YOLO(f"yolov8{size}.pt").export(format="coreml", int8=True, nms=True, imgsz=[640, 384]) + ``` 3. **Place Models in Project:** After exporting, locate the CoreML model files and place them in the `YOLO/Models` directory of your project. -

- Ultralytics YOLO XCode screenshot -

+

+Ultralytics YOLO XCode screenshot +

## Finalizing the Setup From 2ffd15f481396a2128388b59f5bb233d8f2c1e30 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 12 Aug 2024 04:23:36 +0800 Subject: [PATCH 03/26] Update format.yml --- .github/workflows/format.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index f988ad3..5b39b9b 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -13,7 +13,7 @@ on: jobs: format: - runs-on: ubuntu-latest + runs-on: macos-latest steps: - name: Run Ultralytics Formatting uses: ultralytics/actions@main @@ -23,6 +23,7 @@ jobs: python: true # format Python code and docstrings markdown: true # format Markdown prettier: true # format YAML + swift: true # format Swift spelling: false # check spelling links: false # check broken links summary: true # print PR summary with GPT4 (requires 'openai_api_key' or 'openai_azure_api_key' and 'openai_azure_endpoint') From 83a14e8f2a199fc930650c36c1d6e1bdda8911a8 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Sun, 11 Aug 2024 20:24:00 +0000 Subject: [PATCH 04/26] Auto-format by https://ultralytics.com/actions --- YOLO/AppDelegate.swift | 70 +- YOLO/Utilities/BoundingBoxView.swift | 107 +-- YOLO/Utilities/ThresholdProvider.swift | 44 +- YOLO/VideoCapture.swift | 201 ++-- YOLO/ViewController.swift | 1198 ++++++++++++------------ 5 files changed, 833 insertions(+), 787 deletions(-) diff --git a/YOLO/AppDelegate.swift b/YOLO/AppDelegate.swift index fe2f900..d9b8711 100644 --- a/YOLO/AppDelegate.swift +++ b/YOLO/AppDelegate.swift @@ -17,49 +17,53 @@ import UIKit /// The main application delegate, handling global app behavior and configuration. @UIApplicationMain class AppDelegate: UIResponder, UIApplicationDelegate { - var window: UIWindow? + var window: UIWindow? - /// Called when the app finishes launching, used here to set global app settings. - func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { - // Disable screen dimming and auto-lock to keep the app active during long operations. - UIApplication.shared.isIdleTimerDisabled = true + /// Called when the app finishes launching, used here to set global app settings. + func application( + _ application: UIApplication, + didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]? + ) -> Bool { + // Disable screen dimming and auto-lock to keep the app active during long operations. + UIApplication.shared.isIdleTimerDisabled = true - // Enable battery monitoring to allow the app to adapt its behavior based on battery level. - UIDevice.current.isBatteryMonitoringEnabled = true + // Enable battery monitoring to allow the app to adapt its behavior based on battery level. + UIDevice.current.isBatteryMonitoringEnabled = true - // Store the app version and build version in UserDefaults for easy access elsewhere in the app. - if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, - let buildVersion = Bundle.main.infoDictionary?["CFBundleVersion"] as? String { - UserDefaults.standard.set("\(appVersion) (\(buildVersion))", forKey: "app_version") - } + // Store the app version and build version in UserDefaults for easy access elsewhere in the app. + if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String, + let buildVersion = Bundle.main.infoDictionary?["CFBundleVersion"] as? String + { + UserDefaults.standard.set("\(appVersion) (\(buildVersion))", forKey: "app_version") + } - // Store the device's UUID in UserDefaults for identification purposes. - if let uuid = UIDevice.current.identifierForVendor?.uuidString { - UserDefaults.standard.set(uuid, forKey: "uuid") - } + // Store the device's UUID in UserDefaults for identification purposes. + if let uuid = UIDevice.current.identifierForVendor?.uuidString { + UserDefaults.standard.set(uuid, forKey: "uuid") + } - // Ensure UserDefaults changes are immediately saved. - UserDefaults.standard.synchronize() + // Ensure UserDefaults changes are immediately saved. + UserDefaults.standard.synchronize() - return true - } + return true + } } /// Extension to CALayer to add functionality for generating screenshots of any layer. extension CALayer { - var screenShot: UIImage? { - // Begin a new image context, using the device's screen scale to ensure high-resolution output. - UIGraphicsBeginImageContextWithOptions(frame.size, false, UIScreen.main.scale) - defer { - UIGraphicsEndImageContext() - } // Ensure the image context is cleaned up correctly. + var screenShot: UIImage? { + // Begin a new image context, using the device's screen scale to ensure high-resolution output. + UIGraphicsBeginImageContextWithOptions(frame.size, false, UIScreen.main.scale) + defer { + UIGraphicsEndImageContext() + } // Ensure the image context is cleaned up correctly. - if let context = UIGraphicsGetCurrentContext() { - // Render the layer into the current context. - render(in: context) - // Attempt to generate an image from the current context. - return UIGraphicsGetImageFromCurrentImageContext() - } - return nil // Return nil if the operation fails. + if let context = UIGraphicsGetCurrentContext() { + // Render the layer into the current context. + render(in: context) + // Attempt to generate an image from the current context. + return UIGraphicsGetImageFromCurrentImageContext() } + return nil // Return nil if the operation fails. + } } diff --git a/YOLO/Utilities/BoundingBoxView.swift b/YOLO/Utilities/BoundingBoxView.swift index b506545..add81aa 100644 --- a/YOLO/Utilities/BoundingBoxView.swift +++ b/YOLO/Utilities/BoundingBoxView.swift @@ -14,66 +14,67 @@ import UIKit /// Manages the visualization of bounding boxes and associated labels for object detection results. class BoundingBoxView { - /// The layer that draws the bounding box around a detected object. - let shapeLayer: CAShapeLayer + /// The layer that draws the bounding box around a detected object. + let shapeLayer: CAShapeLayer - /// The layer that displays the label and confidence score for the detected object. - let textLayer: CATextLayer + /// The layer that displays the label and confidence score for the detected object. + let textLayer: CATextLayer - /// Initializes a new BoundingBoxView with configured shape and text layers. - init() { - shapeLayer = CAShapeLayer() - shapeLayer.fillColor = UIColor.clear.cgColor // No fill to only show the bounding outline - shapeLayer.lineWidth = 4 // Set the stroke line width - shapeLayer.isHidden = true // Initially hidden; shown when a detection occurs + /// Initializes a new BoundingBoxView with configured shape and text layers. + init() { + shapeLayer = CAShapeLayer() + shapeLayer.fillColor = UIColor.clear.cgColor // No fill to only show the bounding outline + shapeLayer.lineWidth = 4 // Set the stroke line width + shapeLayer.isHidden = true // Initially hidden; shown when a detection occurs - textLayer = CATextLayer() - textLayer.isHidden = true // Initially hidden; shown with label when a detection occurs - textLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays - textLayer.fontSize = 14 // Set font size for the label text - textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels - textLayer.alignmentMode = .center // Center-align the text within the layer - } + textLayer = CATextLayer() + textLayer.isHidden = true // Initially hidden; shown with label when a detection occurs + textLayer.contentsScale = UIScreen.main.scale // Ensure the text is sharp on retina displays + textLayer.fontSize = 14 // Set font size for the label text + textLayer.font = UIFont(name: "Avenir", size: textLayer.fontSize) // Use Avenir font for labels + textLayer.alignmentMode = .center // Center-align the text within the layer + } - /// Adds the bounding box and text layers to a specified parent layer. - /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. - func addToLayer(_ parent: CALayer) { - parent.addSublayer(shapeLayer) - parent.addSublayer(textLayer) - } + /// Adds the bounding box and text layers to a specified parent layer. + /// - Parameter parent: The CALayer to which the bounding box and text layers will be added. + func addToLayer(_ parent: CALayer) { + parent.addSublayer(shapeLayer) + parent.addSublayer(textLayer) + } - /// Updates the bounding box and label to be visible with specified properties. - /// - Parameters: - /// - frame: The CGRect frame defining the bounding box's size and position. - /// - label: The text label to display (e.g., object class and confidence). - /// - color: The color of the bounding box stroke and label background. - /// - alpha: The opacity level for the bounding box stroke and label background. - func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { - CATransaction.setDisableActions(true) // Disable implicit animations + /// Updates the bounding box and label to be visible with specified properties. + /// - Parameters: + /// - frame: The CGRect frame defining the bounding box's size and position. + /// - label: The text label to display (e.g., object class and confidence). + /// - color: The color of the bounding box stroke and label background. + /// - alpha: The opacity level for the bounding box stroke and label background. + func show(frame: CGRect, label: String, color: UIColor, alpha: CGFloat) { + CATransaction.setDisableActions(true) // Disable implicit animations - let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box - shapeLayer.path = path.cgPath - shapeLayer.strokeColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the stroke - shapeLayer.isHidden = false // Make the shape layer visible + let path = UIBezierPath(roundedRect: frame, cornerRadius: 6.0) // Rounded rectangle for the bounding box + shapeLayer.path = path.cgPath + shapeLayer.strokeColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the stroke + shapeLayer.isHidden = false // Make the shape layer visible - textLayer.string = label // Set the label text - textLayer.backgroundColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the background - textLayer.isHidden = false // Make the text layer visible - textLayer.foregroundColor = UIColor.white.withAlphaComponent(alpha).cgColor // Set text color + textLayer.string = label // Set the label text + textLayer.backgroundColor = color.withAlphaComponent(alpha).cgColor // Apply color and alpha to the background + textLayer.isHidden = false // Make the text layer visible + textLayer.foregroundColor = UIColor.white.withAlphaComponent(alpha).cgColor // Set text color - // Calculate the text size and position based on the label content - let attributes = [NSAttributedString.Key.font: textLayer.font as Any] - let textRect = label.boundingRect(with: CGSize(width: 400, height: 100), - options: .truncatesLastVisibleLine, - attributes: attributes, context: nil) - let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size - let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box - textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame - } + // Calculate the text size and position based on the label content + let attributes = [NSAttributedString.Key.font: textLayer.font as Any] + let textRect = label.boundingRect( + with: CGSize(width: 400, height: 100), + options: .truncatesLastVisibleLine, + attributes: attributes, context: nil) + let textSize = CGSize(width: textRect.width + 12, height: textRect.height) // Add padding to the text size + let textOrigin = CGPoint(x: frame.origin.x - 2, y: frame.origin.y - textSize.height - 2) // Position above the bounding box + textLayer.frame = CGRect(origin: textOrigin, size: textSize) // Set the text layer frame + } - /// Hides the bounding box and text layers. - func hide() { - shapeLayer.isHidden = true - textLayer.isHidden = true - } + /// Hides the bounding box and text layers. + func hide() { + shapeLayer.isHidden = true + textLayer.isHidden = true + } } diff --git a/YOLO/Utilities/ThresholdProvider.swift b/YOLO/Utilities/ThresholdProvider.swift index 53702d1..22c8d6a 100644 --- a/YOLO/Utilities/ThresholdProvider.swift +++ b/YOLO/Utilities/ThresholdProvider.swift @@ -14,29 +14,29 @@ import CoreML /// Provides custom IoU and confidence thresholds for adjusting model predictions. class ThresholdProvider: MLFeatureProvider { - /// Stores IoU and confidence thresholds as MLFeatureValue objects. - var values: [String: MLFeatureValue] + /// Stores IoU and confidence thresholds as MLFeatureValue objects. + var values: [String: MLFeatureValue] - /// The set of feature names provided by this provider. - var featureNames: Set { - return Set(values.keys) - } + /// The set of feature names provided by this provider. + var featureNames: Set { + return Set(values.keys) + } - /// Initializes the provider with specified IoU and confidence thresholds. - /// - Parameters: - /// - iouThreshold: The IoU threshold for determining object overlap. - /// - confidenceThreshold: The minimum confidence for considering a detection valid. - init(iouThreshold: Double = 0.45, confidenceThreshold: Double = 0.25) { - values = [ - "iouThreshold": MLFeatureValue(double: iouThreshold), - "confidenceThreshold": MLFeatureValue(double: confidenceThreshold) - ] - } + /// Initializes the provider with specified IoU and confidence thresholds. + /// - Parameters: + /// - iouThreshold: The IoU threshold for determining object overlap. + /// - confidenceThreshold: The minimum confidence for considering a detection valid. + init(iouThreshold: Double = 0.45, confidenceThreshold: Double = 0.25) { + values = [ + "iouThreshold": MLFeatureValue(double: iouThreshold), + "confidenceThreshold": MLFeatureValue(double: confidenceThreshold), + ] + } - /// Returns the feature value for the given feature name. - /// - Parameter featureName: The name of the feature. - /// - Returns: The MLFeatureValue object corresponding to the feature name. - func featureValue(for featureName: String) -> MLFeatureValue? { - return values[featureName] - } + /// Returns the feature value for the given feature name. + /// - Parameter featureName: The name of the feature. + /// - Returns: The MLFeatureValue object corresponding to the feature name. + func featureValue(for featureName: String) -> MLFeatureValue? { + return values[featureName] + } } diff --git a/YOLO/VideoCapture.swift b/YOLO/VideoCapture.swift index 79aaf99..82e7fe9 100644 --- a/YOLO/VideoCapture.swift +++ b/YOLO/VideoCapture.swift @@ -11,123 +11,134 @@ // the capture session. It also provides methods to start and stop video capture and delivers captured frames // to a delegate implementing the VideoCaptureDelegate protocol. - import AVFoundation import CoreVideo import UIKit // Defines the protocol for handling video frame capture events. public protocol VideoCaptureDelegate: AnyObject { - func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CMSampleBuffer) + func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame: CMSampleBuffer) } // Identifies the best available camera device based on user preferences and device capabilities. func bestCaptureDevice() -> AVCaptureDevice { - if UserDefaults.standard.bool(forKey: "use_telephoto"), let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) { - return device - } else if let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back) { - return device - } else if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) { - return device - } else { - fatalError("Expected back camera device is not available.") - } + if UserDefaults.standard.bool(forKey: "use_telephoto"), + let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) + { + return device + } else if let device = AVCaptureDevice.default(.builtInDualCamera, for: .video, position: .back) { + return device + } else if let device = AVCaptureDevice.default( + .builtInWideAngleCamera, for: .video, position: .back) + { + return device + } else { + fatalError("Expected back camera device is not available.") + } } public class VideoCapture: NSObject { - public var previewLayer: AVCaptureVideoPreviewLayer? - public weak var delegate: VideoCaptureDelegate? - - let captureDevice = bestCaptureDevice() - let captureSession = AVCaptureSession() - let videoOutput = AVCaptureVideoDataOutput() - var cameraOutput = AVCapturePhotoOutput() - let queue = DispatchQueue(label: "camera-queue") - - // Configures the camera and capture session with optional session presets. - public func setUp(sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void) { - queue.async { - let success = self.setUpCamera(sessionPreset: sessionPreset) - DispatchQueue.main.async { - completion(success) - } - } + public var previewLayer: AVCaptureVideoPreviewLayer? + public weak var delegate: VideoCaptureDelegate? + + let captureDevice = bestCaptureDevice() + let captureSession = AVCaptureSession() + let videoOutput = AVCaptureVideoDataOutput() + var cameraOutput = AVCapturePhotoOutput() + let queue = DispatchQueue(label: "camera-queue") + + // Configures the camera and capture session with optional session presets. + public func setUp( + sessionPreset: AVCaptureSession.Preset = .hd1280x720, completion: @escaping (Bool) -> Void + ) { + queue.async { + let success = self.setUpCamera(sessionPreset: sessionPreset) + DispatchQueue.main.async { + completion(success) + } } + } + + // Internal method to configure camera inputs, outputs, and session properties. + private func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { + captureSession.beginConfiguration() + captureSession.sessionPreset = sessionPreset - // Internal method to configure camera inputs, outputs, and session properties. - private func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { - captureSession.beginConfiguration() - captureSession.sessionPreset = sessionPreset - - guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { - return false - } - - if captureSession.canAddInput(videoInput) { - captureSession.addInput(videoInput) - } - - let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) - previewLayer.videoGravity = .resizeAspectFill - previewLayer.connection?.videoOrientation = .portrait - self.previewLayer = previewLayer - - let settings: [String: Any] = [ - kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) - ] - - videoOutput.videoSettings = settings - videoOutput.alwaysDiscardsLateVideoFrames = true - videoOutput.setSampleBufferDelegate(self, queue: queue) - if captureSession.canAddOutput(videoOutput) { - captureSession.addOutput(videoOutput) - } - - if captureSession.canAddOutput(cameraOutput) { - captureSession.addOutput(cameraOutput) - } - - videoOutput.connection(with: .video)?.videoOrientation = .portrait - - do { - try captureDevice.lockForConfiguration() - captureDevice.focusMode = .continuousAutoFocus - captureDevice.focusPointOfInterest = CGPoint(x: 0.5, y: 0.5) - captureDevice.exposureMode = .continuousAutoExposure - captureDevice.unlockForConfiguration() - } catch { - print("Unable to configure the capture device.") - return false - } - - captureSession.commitConfiguration() - return true + guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { + return false } - // Starts the video capture session. - public func start() { - if !captureSession.isRunning { - DispatchQueue.global(qos: .userInitiated).async { [weak self] in - self?.captureSession.startRunning() - } - } + if captureSession.canAddInput(videoInput) { + captureSession.addInput(videoInput) } - // Stops the video capture session. - public func stop() { - if captureSession.isRunning { - captureSession.stopRunning() - } + let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) + previewLayer.videoGravity = .resizeAspectFill + previewLayer.connection?.videoOrientation = .portrait + self.previewLayer = previewLayer + + let settings: [String: Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) + ] + + videoOutput.videoSettings = settings + videoOutput.alwaysDiscardsLateVideoFrames = true + videoOutput.setSampleBufferDelegate(self, queue: queue) + if captureSession.canAddOutput(videoOutput) { + captureSession.addOutput(videoOutput) } -} -// Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. -extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { - public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - delegate?.videoCapture(self, didCaptureVideoFrame: sampleBuffer) + if captureSession.canAddOutput(cameraOutput) { + captureSession.addOutput(cameraOutput) } - public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { - // Optionally handle dropped frames, e.g., due to full buffer. + videoOutput.connection(with: .video)?.videoOrientation = .portrait + + do { + try captureDevice.lockForConfiguration() + captureDevice.focusMode = .continuousAutoFocus + captureDevice.focusPointOfInterest = CGPoint(x: 0.5, y: 0.5) + captureDevice.exposureMode = .continuousAutoExposure + captureDevice.unlockForConfiguration() + } catch { + print("Unable to configure the capture device.") + return false } + + captureSession.commitConfiguration() + return true + } + + // Starts the video capture session. + public func start() { + if !captureSession.isRunning { + DispatchQueue.global(qos: .userInitiated).async { [weak self] in + self?.captureSession.startRunning() + } + } + } + + // Stops the video capture session. + public func stop() { + if captureSession.isRunning { + captureSession.stopRunning() + } + } +} + +// Extension to handle AVCaptureVideoDataOutputSampleBufferDelegate events. +extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { + public func captureOutput( + _ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + delegate?.videoCapture(self, didCaptureVideoFrame: sampleBuffer) + } + + public func captureOutput( + _ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + // Optionally handle dropped frames, e.g., due to full buffer. + } } diff --git a/YOLO/ViewController.swift b/YOLO/ViewController.swift index a1355cc..bde25fc 100644 --- a/YOLO/ViewController.swift +++ b/YOLO/ViewController.swift @@ -12,631 +12,661 @@ // the device's camera. import AVFoundation -import CoreMedia import CoreML +import CoreMedia import UIKit import Vision var mlModel = try! yolov8m(configuration: .init()).model class ViewController: UIViewController { - @IBOutlet var videoPreview: UIView! - @IBOutlet var View0: UIView! - @IBOutlet var segmentedControl: UISegmentedControl! - @IBOutlet var playButtonOutlet: UIBarButtonItem! - @IBOutlet var pauseButtonOutlet: UIBarButtonItem! - @IBOutlet var slider: UISlider! - @IBOutlet var sliderConf: UISlider! - @IBOutlet var sliderIoU: UISlider! - @IBOutlet weak var labelName: UILabel! - @IBOutlet weak var labelFPS: UILabel! - @IBOutlet weak var labelZoom: UILabel! - @IBOutlet weak var labelVersion: UILabel! - @IBOutlet weak var labelSlider: UILabel! - @IBOutlet weak var labelSliderConf: UILabel! - @IBOutlet weak var labelSliderIoU: UILabel! - @IBOutlet weak var activityIndicator: UIActivityIndicatorView! - var screenshotImageView:UIImageView? - - let selection = UISelectionFeedbackGenerator() - var detector = try! VNCoreMLModel(for: mlModel) - var session: AVCaptureSession! - var videoCapture: VideoCapture! - var currentBuffer: CVPixelBuffer? - var framesDone = 0 - var t0 = 0.0 // inference start - var t1 = 0.0 // inference dt - var t2 = 0.0 // inference dt smoothed - var t3 = CACurrentMediaTime() // FPS start - var t4 = 0.0 // FPS dt smoothed - // var cameraOutput: AVCapturePhotoOutput! - - // Developer mode - let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings - let save_detections = false // write every detection to detections.txt - let save_frames = false // write every frame to frames.txt - - lazy var visionRequest: VNCoreMLRequest = { - let request = VNCoreMLRequest(model: detector, completionHandler: { - [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - // NOTE: BoundingBoxView object scaling depends on request.imageCropAndScaleOption https://developer.apple.com/documentation/vision/vnimagecropandscaleoption - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - return request - }() - - override func viewDidLoad() { - super.viewDidLoad() - slider.value = 30 - setLabels() - setUpBoundingBoxViews() - startVideo() - // setModel() + @IBOutlet var videoPreview: UIView! + @IBOutlet var View0: UIView! + @IBOutlet var segmentedControl: UISegmentedControl! + @IBOutlet var playButtonOutlet: UIBarButtonItem! + @IBOutlet var pauseButtonOutlet: UIBarButtonItem! + @IBOutlet var slider: UISlider! + @IBOutlet var sliderConf: UISlider! + @IBOutlet var sliderIoU: UISlider! + @IBOutlet weak var labelName: UILabel! + @IBOutlet weak var labelFPS: UILabel! + @IBOutlet weak var labelZoom: UILabel! + @IBOutlet weak var labelVersion: UILabel! + @IBOutlet weak var labelSlider: UILabel! + @IBOutlet weak var labelSliderConf: UILabel! + @IBOutlet weak var labelSliderIoU: UILabel! + @IBOutlet weak var activityIndicator: UIActivityIndicatorView! + var screenshotImageView: UIImageView? + + let selection = UISelectionFeedbackGenerator() + var detector = try! VNCoreMLModel(for: mlModel) + var session: AVCaptureSession! + var videoCapture: VideoCapture! + var currentBuffer: CVPixelBuffer? + var framesDone = 0 + var t0 = 0.0 // inference start + var t1 = 0.0 // inference dt + var t2 = 0.0 // inference dt smoothed + var t3 = CACurrentMediaTime() // FPS start + var t4 = 0.0 // FPS dt smoothed + // var cameraOutput: AVCapturePhotoOutput! + + // Developer mode + let developerMode = UserDefaults.standard.bool(forKey: "developer_mode") // developer mode selected in settings + let save_detections = false // write every detection to detections.txt + let save_frames = false // write every frame to frames.txt + + lazy var visionRequest: VNCoreMLRequest = { + let request = VNCoreMLRequest( + model: detector, + completionHandler: { + [weak self] request, error in + self?.processObservations(for: request, error: error) + }) + // NOTE: BoundingBoxView object scaling depends on request.imageCropAndScaleOption https://developer.apple.com/documentation/vision/vnimagecropandscaleoption + request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop + return request + }() + + override func viewDidLoad() { + super.viewDidLoad() + slider.value = 30 + setLabels() + setUpBoundingBoxViews() + startVideo() + // setModel() + } + + @IBAction func vibrate(_ sender: Any) { + selection.selectionChanged() + } + + @IBAction func indexChanged(_ sender: Any) { + selection.selectionChanged() + activityIndicator.startAnimating() + + /// Switch model + switch segmentedControl.selectedSegmentIndex { + case 0: + self.labelName.text = "YOLOv8n" + mlModel = try! yolov8n(configuration: .init()).model + case 1: + self.labelName.text = "YOLOv8s" + mlModel = try! yolov8s(configuration: .init()).model + case 2: + self.labelName.text = "YOLOv8m" + mlModel = try! yolov8m(configuration: .init()).model + case 3: + self.labelName.text = "YOLOv8l" + mlModel = try! yolov8l(configuration: .init()).model + case 4: + self.labelName.text = "YOLOv8x" + mlModel = try! yolov8x(configuration: .init()).model + default: + break } - - @IBAction func vibrate(_ sender: Any) { - selection.selectionChanged() + setModel() + setUpBoundingBoxViews() + activityIndicator.stopAnimating() + } + + func setModel() { + /// VNCoreMLModel + detector = try! VNCoreMLModel(for: mlModel) + detector.featureProvider = ThresholdProvider() + + /// VNCoreMLRequest + let request = VNCoreMLRequest( + model: detector, + completionHandler: { [weak self] request, error in + self?.processObservations(for: request, error: error) + }) + request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop + visionRequest = request + t2 = 0.0 // inference dt smoothed + t3 = CACurrentMediaTime() // FPS start + t4 = 0.0 // FPS dt smoothed + } + + /// Update thresholds from slider values + @IBAction func sliderChanged(_ sender: Any) { + let conf = Double(round(100 * sliderConf.value)) / 100 + let iou = Double(round(100 * sliderIoU.value)) / 100 + self.labelSliderConf.text = String(conf) + " Confidence Threshold" + self.labelSliderIoU.text = String(iou) + " IoU Threshold" + detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) + } + + @IBAction func takePhoto(_ sender: Any?) { + let t0 = DispatchTime.now().uptimeNanoseconds + + // 1. captureSession and cameraOutput + // session = videoCapture.captureSession // session = AVCaptureSession() + // session.sessionPreset = AVCaptureSession.Preset.photo + // cameraOutput = AVCapturePhotoOutput() + // cameraOutput.isHighResolutionCaptureEnabled = true + // cameraOutput.isDualCameraDualPhotoDeliveryEnabled = true + // print("1 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) + + // 2. Settings + let settings = AVCapturePhotoSettings() + // settings.flashMode = .off + // settings.isHighResolutionPhotoEnabled = cameraOutput.isHighResolutionCaptureEnabled + // settings.isDualCameraDualPhotoDeliveryEnabled = self.videoCapture.cameraOutput.isDualCameraDualPhotoDeliveryEnabled + + // 3. Capture Photo + usleep(20_000) // short 10 ms delay to allow camera to focus + self.videoCapture.cameraOutput.capturePhoto( + with: settings, delegate: self as AVCapturePhotoCaptureDelegate) + print("3 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) + } + + @IBAction func logoButton(_ sender: Any) { + selection.selectionChanged() + if let link = URL(string: "https://www.ultralytics.com") { + UIApplication.shared.open(link) } - - @IBAction func indexChanged(_ sender: Any) { - selection.selectionChanged() - activityIndicator.startAnimating() - - /// Switch model - switch segmentedControl.selectedSegmentIndex { - case 0: - self.labelName.text = "YOLOv8n" - mlModel = try! yolov8n(configuration: .init()).model - case 1: - self.labelName.text = "YOLOv8s" - mlModel = try! yolov8s(configuration: .init()).model - case 2: - self.labelName.text = "YOLOv8m" - mlModel = try! yolov8m(configuration: .init()).model - case 3: - self.labelName.text = "YOLOv8l" - mlModel = try! yolov8l(configuration: .init()).model - case 4: - self.labelName.text = "YOLOv8x" - mlModel = try! yolov8x(configuration: .init()).model - default: - break - } - setModel() - setUpBoundingBoxViews() - activityIndicator.stopAnimating() + } + + func setLabels() { + self.labelName.text = "YOLOv8m" + self.labelVersion.text = "Version " + UserDefaults.standard.string(forKey: "app_version")! + } + + @IBAction func playButton(_ sender: Any) { + selection.selectionChanged() + self.videoCapture.start() + playButtonOutlet.isEnabled = false + pauseButtonOutlet.isEnabled = true + } + + @IBAction func pauseButton(_ sender: Any?) { + selection.selectionChanged() + self.videoCapture.stop() + playButtonOutlet.isEnabled = true + pauseButtonOutlet.isEnabled = false + } + + @IBAction func switchCameraTapped(_ sender: Any) { + self.videoCapture.captureSession.beginConfiguration() + let currentInput = self.videoCapture.captureSession.inputs.first as? AVCaptureDeviceInput + self.videoCapture.captureSession.removeInput(currentInput!) + // let newCameraDevice = currentInput?.device == .builtInWideAngleCamera ? getCamera(with: .front) : getCamera(with: .back) + + let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back)! + guard let videoInput1 = try? AVCaptureDeviceInput(device: device) else { + return } - - func setModel() { - /// VNCoreMLModel - detector = try! VNCoreMLModel(for: mlModel) - detector.featureProvider = ThresholdProvider() - - /// VNCoreMLRequest - let request = VNCoreMLRequest(model: detector, completionHandler: { [weak self] request, error in - self?.processObservations(for: request, error: error) - }) - request.imageCropAndScaleOption = .scaleFill // .scaleFit, .scaleFill, .centerCrop - visionRequest = request - t2 = 0.0 // inference dt smoothed - t3 = CACurrentMediaTime() // FPS start - t4 = 0.0 // FPS dt smoothed + + self.videoCapture.captureSession.addInput(videoInput1) + self.videoCapture.captureSession.commitConfiguration() + } + + // share image + @IBAction func shareButton(_ sender: Any) { + selection.selectionChanged() + let settings = AVCapturePhotoSettings() + self.videoCapture.cameraOutput.capturePhoto( + with: settings, delegate: self as AVCapturePhotoCaptureDelegate) + } + + // share screenshot + @IBAction func saveScreenshotButton(_ shouldSave: Bool = true) { + // let layer = UIApplication.shared.keyWindow!.layer + // let scale = UIScreen.main.scale + // UIGraphicsBeginImageContextWithOptions(layer.frame.size, false, scale); + // layer.render(in: UIGraphicsGetCurrentContext()!) + // let screenshot = UIGraphicsGetImageFromCurrentImageContext() + // UIGraphicsEndImageContext() + + // let screenshot = UIApplication.shared.screenShot + // UIImageWriteToSavedPhotosAlbum(screenshot!, nil, nil, nil) + } + + let maxBoundingBoxViews = 100 + var boundingBoxViews = [BoundingBoxView]() + var colors: [String: UIColor] = [:] + + func setUpBoundingBoxViews() { + // Ensure all bounding box views are initialized up to the maximum allowed. + while boundingBoxViews.count < maxBoundingBoxViews { + boundingBoxViews.append(BoundingBoxView()) } - - /// Update thresholds from slider values - @IBAction func sliderChanged(_ sender: Any) { - let conf = Double(round(100 * sliderConf.value)) / 100 - let iou = Double(round(100 * sliderIoU.value)) / 100 - self.labelSliderConf.text = String(conf) + " Confidence Threshold" - self.labelSliderIoU.text = String(iou) + " IoU Threshold" - detector.featureProvider = ThresholdProvider(iouThreshold: iou, confidenceThreshold: conf) + + // Retrieve class labels directly from the CoreML model's class labels, if available. + guard let classLabels = mlModel.modelDescription.classLabels as? [String] else { + fatalError("Class labels are missing from the model description") } - - @IBAction func takePhoto(_ sender: Any?) { - let t0 = DispatchTime.now().uptimeNanoseconds - - // 1. captureSession and cameraOutput - // session = videoCapture.captureSession // session = AVCaptureSession() - // session.sessionPreset = AVCaptureSession.Preset.photo - // cameraOutput = AVCapturePhotoOutput() - // cameraOutput.isHighResolutionCaptureEnabled = true - // cameraOutput.isDualCameraDualPhotoDeliveryEnabled = true - // print("1 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) - - // 2. Settings - let settings = AVCapturePhotoSettings() - // settings.flashMode = .off - // settings.isHighResolutionPhotoEnabled = cameraOutput.isHighResolutionCaptureEnabled - // settings.isDualCameraDualPhotoDeliveryEnabled = self.videoCapture.cameraOutput.isDualCameraDualPhotoDeliveryEnabled - - // 3. Capture Photo - usleep(20_000) // short 10 ms delay to allow camera to focus - self.videoCapture.cameraOutput.capturePhoto(with: settings, delegate: self as AVCapturePhotoCaptureDelegate) - print("3 Done: ", Double(DispatchTime.now().uptimeNanoseconds - t0) / 1E9) + + // Assign random colors to the classes. + for label in classLabels { + if colors[label] == nil { // if key not in dict + colors[label] = UIColor( + red: CGFloat.random(in: 0...1), + green: CGFloat.random(in: 0...1), + blue: CGFloat.random(in: 0...1), + alpha: 0.6) + } } - - @IBAction func logoButton(_ sender: Any) { - selection.selectionChanged() - if let link = URL(string: "https://www.ultralytics.com") { - UIApplication.shared.open(link) + } + + func startVideo() { + videoCapture = VideoCapture() + videoCapture.delegate = self + + videoCapture.setUp(sessionPreset: .photo) { success in + // .hd4K3840x2160 or .photo (4032x3024) Warning: 4k may not work on all devices i.e. 2019 iPod + if success { + // Add the video preview into the UI. + if let previewLayer = self.videoCapture.previewLayer { + self.videoPreview.layer.addSublayer(previewLayer) + self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer } - } - - func setLabels() { - self.labelName.text = "YOLOv8m" - self.labelVersion.text = "Version " + UserDefaults.standard.string(forKey: "app_version")! - } - - @IBAction func playButton(_ sender: Any) { - selection.selectionChanged() + + // Add the bounding box layers to the UI, on top of the video preview. + for box in self.boundingBoxViews { + box.addToLayer(self.videoPreview.layer) + } + + // Once everything is set up, we can start capturing live video. self.videoCapture.start() - playButtonOutlet.isEnabled = false - pauseButtonOutlet.isEnabled = true + } + } + } + + func predict(sampleBuffer: CMSampleBuffer) { + if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + currentBuffer = pixelBuffer + + /// - Tag: MappingOrientation + // The frame is always oriented based on the camera sensor, + // so in most cases Vision needs to rotate it for the model to work as expected. + let imageOrientation: CGImagePropertyOrientation + switch UIDevice.current.orientation { + case .portrait: + imageOrientation = .up + case .portraitUpsideDown: + imageOrientation = .down + case .landscapeLeft: + imageOrientation = .left + case .landscapeRight: + imageOrientation = .right + case .unknown: + print("The device orientation is unknown, the predictions may be affected") + fallthrough + default: + imageOrientation = .up + } + + // Invoke a VNRequestHandler with that image + let handler = VNImageRequestHandler( + cvPixelBuffer: pixelBuffer, orientation: imageOrientation, options: [:]) + if UIDevice.current.orientation != .faceUp { // stop if placed down on a table + t0 = CACurrentMediaTime() // inference start + do { + try handler.perform([visionRequest]) + } catch { + print(error) + } + t1 = CACurrentMediaTime() - t0 // inference dt + } + + currentBuffer = nil } - - @IBAction func pauseButton(_ sender: Any?) { - selection.selectionChanged() - self.videoCapture.stop() - playButtonOutlet.isEnabled = true - pauseButtonOutlet.isEnabled = false + } + + func processObservations(for request: VNRequest, error: Error?) { + DispatchQueue.main.async { + if let results = request.results as? [VNRecognizedObjectObservation] { + self.show(predictions: results) + } else { + self.show(predictions: []) + } + + // Measure FPS + if self.t1 < 10.0 { // valid dt + self.t2 = self.t1 * 0.05 + self.t2 * 0.95 // smoothed inference time + } + self.t4 = (CACurrentMediaTime() - self.t3) * 0.05 + self.t4 * 0.95 // smoothed delivered FPS + self.labelFPS.text = String(format: "%.1f FPS - %.1f ms", 1 / self.t4, self.t2 * 1000) // t2 seconds to ms + self.t3 = CACurrentMediaTime() } - - @IBAction func switchCameraTapped(_ sender: Any) { - self.videoCapture.captureSession.beginConfiguration() - let currentInput = self.videoCapture.captureSession.inputs.first as? AVCaptureDeviceInput - self.videoCapture.captureSession.removeInput(currentInput!) - // let newCameraDevice = currentInput?.device == .builtInWideAngleCamera ? getCamera(with: .front) : getCamera(with: .back) - - let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back)! - guard let videoInput1 = try? AVCaptureDeviceInput(device: device) else { - return + } + + // Save text file + func saveText(text: String, file: String = "saved.txt") { + if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { + let fileURL = dir.appendingPathComponent(file) + + // Writing + do { // Append to file if it exists + let fileHandle = try FileHandle(forWritingTo: fileURL) + fileHandle.seekToEndOfFile() + fileHandle.write(text.data(using: .utf8)!) + fileHandle.closeFile() + } catch { // Create new file and write + do { + try text.write(to: fileURL, atomically: false, encoding: .utf8) + } catch { + print("no file written") } - - self.videoCapture.captureSession.addInput(videoInput1) - self.videoCapture.captureSession.commitConfiguration() + } + + // Reading + // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} } - - // share image - @IBAction func shareButton(_ sender: Any) { - selection.selectionChanged() - let settings = AVCapturePhotoSettings() - self.videoCapture.cameraOutput.capturePhoto(with: settings, delegate: self as AVCapturePhotoCaptureDelegate) + } + + // Save image file + func saveImage() { + let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first + let fileURL = dir!.appendingPathComponent("saved.jpg") + let image = UIImage(named: "ultralytics_yolo_logotype.png") + FileManager.default.createFile( + atPath: fileURL.path, contents: image!.jpegData(compressionQuality: 0.5), attributes: nil) + } + + // Return hard drive space (GB) + func freeSpace() -> Double { + let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) + do { + let values = try fileURL.resourceValues(forKeys: [ + .volumeAvailableCapacityForImportantUsageKey + ]) + return Double(values.volumeAvailableCapacityForImportantUsage!) / 1E9 // Bytes to GB + } catch { + print("Error retrieving storage capacity: \(error.localizedDescription)") } - - // share screenshot - @IBAction func saveScreenshotButton(_ shouldSave: Bool = true) { - // let layer = UIApplication.shared.keyWindow!.layer - // let scale = UIScreen.main.scale - // UIGraphicsBeginImageContextWithOptions(layer.frame.size, false, scale); - // layer.render(in: UIGraphicsGetCurrentContext()!) - // let screenshot = UIGraphicsGetImageFromCurrentImageContext() - // UIGraphicsEndImageContext() - - // let screenshot = UIApplication.shared.screenShot - // UIImageWriteToSavedPhotosAlbum(screenshot!, nil, nil, nil) + return 0 + } + + // Return RAM usage (GB) + func memoryUsage() -> Double { + var taskInfo = mach_task_basic_info() + var count = mach_msg_type_number_t(MemoryLayout.size) / 4 + let kerr: kern_return_t = withUnsafeMutablePointer(to: &taskInfo) { + $0.withMemoryRebound(to: integer_t.self, capacity: 1) { + task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count) + } } - - let maxBoundingBoxViews = 100 - var boundingBoxViews = [BoundingBoxView]() - var colors: [String: UIColor] = [:] - - func setUpBoundingBoxViews() { - // Ensure all bounding box views are initialized up to the maximum allowed. - while boundingBoxViews.count < maxBoundingBoxViews { - boundingBoxViews.append(BoundingBoxView()) - } - - // Retrieve class labels directly from the CoreML model's class labels, if available. - guard let classLabels = mlModel.modelDescription.classLabels as? [String] else { - fatalError("Class labels are missing from the model description") - } - - // Assign random colors to the classes. - for label in classLabels { - if colors[label] == nil { // if key not in dict - colors[label] = UIColor(red: CGFloat.random(in: 0...1), - green: CGFloat.random(in: 0...1), - blue: CGFloat.random(in: 0...1), - alpha: 0.6) - } - } + if kerr == KERN_SUCCESS { + return Double(taskInfo.resident_size) / 1E9 // Bytes to GB + } else { + return 0 } - - func startVideo() { - videoCapture = VideoCapture() - videoCapture.delegate = self - - videoCapture.setUp(sessionPreset: .photo) { success in - // .hd4K3840x2160 or .photo (4032x3024) Warning: 4k may not work on all devices i.e. 2019 iPod - if success { - // Add the video preview into the UI. - if let previewLayer = self.videoCapture.previewLayer { - self.videoPreview.layer.addSublayer(previewLayer) - self.videoCapture.previewLayer?.frame = self.videoPreview.bounds // resize preview layer - } - - // Add the bounding box layers to the UI, on top of the video preview. - for box in self.boundingBoxViews { - box.addToLayer(self.videoPreview.layer) - } - - // Once everything is set up, we can start capturing live video. - self.videoCapture.start() - } - } + } + + func show(predictions: [VNRecognizedObjectObservation]) { + let width = videoPreview.bounds.width // 375 pix + let height = videoPreview.bounds.height // 812 pix + var str = "" + + // ratio = videoPreview AR divided by sessionPreset AR + var ratio: CGFloat = 1.0 + if videoCapture.captureSession.sessionPreset == .photo { + ratio = (height / width) / (4.0 / 3.0) // .photo + } else { + ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. } - - func predict(sampleBuffer: CMSampleBuffer) { - if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { - currentBuffer = pixelBuffer - - /// - Tag: MappingOrientation - // The frame is always oriented based on the camera sensor, - // so in most cases Vision needs to rotate it for the model to work as expected. - let imageOrientation: CGImagePropertyOrientation - switch UIDevice.current.orientation { - case .portrait: - imageOrientation = .up - case .portraitUpsideDown: - imageOrientation = .down - case .landscapeLeft: - imageOrientation = .left - case .landscapeRight: - imageOrientation = .right - case .unknown: - print("The device orientation is unknown, the predictions may be affected") - fallthrough - default: - imageOrientation = .up - } - - // Invoke a VNRequestHandler with that image - let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: imageOrientation, options: [:]) - if UIDevice.current.orientation != .faceUp { // stop if placed down on a table - t0 = CACurrentMediaTime() // inference start - do { - try handler.perform([visionRequest]) - } catch { - print(error) - } - t1 = CACurrentMediaTime() - t0 // inference dt - } - - currentBuffer = nil + + // date + let date = Date() + let calendar = Calendar.current + let hour = calendar.component(.hour, from: date) + let minutes = calendar.component(.minute, from: date) + let seconds = calendar.component(.second, from: date) + let nanoseconds = calendar.component(.nanosecond, from: date) + let sec_day = + Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day + + self.labelSlider.text = + String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" + for i in 0..= 1 { // iPhone ratio = 1.218 + let offset = (1 - ratio) * (0.5 - rect.minX) + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) + rect = rect.applying(transform) + rect.size.width *= ratio + } else { // iPad ratio = 0.75 + let offset = (ratio - 1) * (0.5 - rect.maxY) + let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) + rect = rect.applying(transform) + rect.size.height /= ratio } - } - - // Save text file - func saveText(text: String, file: String = "saved.txt") { - if let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first { - let fileURL = dir.appendingPathComponent(file) - - // Writing - do { // Append to file if it exists - let fileHandle = try FileHandle(forWritingTo: fileURL) - fileHandle.seekToEndOfFile() - fileHandle.write(text.data(using: .utf8)!) - fileHandle.closeFile() - } catch { // Create new file and write - do { - try text.write(to: fileURL, atomically: false, encoding: .utf8) - } catch { - print("no file written") - } - } - - // Reading - // do {let text2 = try String(contentsOf: fileURL, encoding: .utf8)} catch {/* error handling here */} + + // Scale normalized to pixels [375, 812] [width, height] + rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) + + // The labels array is a list of VNClassificationObservation objects, + // with the highest scoring class first in the list. + let bestClass = prediction.labels[0].identifier + let confidence = prediction.labels[0].confidence + // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) + + // Show the bounding box. + boundingBoxViews[i].show( + frame: rect, + label: String(format: "%@ %.1f", bestClass, confidence * 100), + color: colors[bestClass] ?? UIColor.white, + alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) + + if developerMode { + // Write + if save_detections { + str += String( + format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", + sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, + rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) + } + + // Action trigger upon detection + // if false { + // if (bestClass == "car") { // "cell phone", "car", "person" + // self.takePhoto(nil) + // // self.pauseButton(nil) + // sleep(2) + // } + // } } + } else { + boundingBoxViews[i].hide() + } } - - // Save image file - func saveImage() { - let dir = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first - let fileURL = dir!.appendingPathComponent("saved.jpg") - let image = UIImage(named: "ultralytics_yolo_logotype.png") - FileManager.default.createFile(atPath: fileURL.path, contents: image!.jpegData(compressionQuality: 0.5), attributes: nil) + + // Write + if developerMode { + if save_detections { + saveText(text: str, file: "detections.txt") // Write stats for each detection + } + if save_frames { + str = String( + format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", + sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, + self.t1 * 1000, self.t2 * 1000, 1 / self.t4) + saveText(text: str, file: "frames.txt") // Write stats for each image + } } - - // Return hard drive space (GB) - func freeSpace() -> Double { - let fileURL = URL(fileURLWithPath: NSHomeDirectory() as String) - do { - let values = try fileURL.resourceValues(forKeys: [.volumeAvailableCapacityForImportantUsageKey]) - return Double(values.volumeAvailableCapacityForImportantUsage!) / 1E9 // Bytes to GB - } catch { - print("Error retrieving storage capacity: \(error.localizedDescription)") - } - return 0 + + // Debug + // print(str) + // print(UIDevice.current.identifierForVendor!) + // saveImage() + } + + // Pinch to Zoom Start --------------------------------------------------------------------------------------------- + let minimumZoom: CGFloat = 1.0 + let maximumZoom: CGFloat = 10.0 + var lastZoomFactor: CGFloat = 1.0 + + @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { + let device = videoCapture.captureDevice + + // Return zoom value between the minimum and maximum zoom values + func minMaxZoom(_ factor: CGFloat) -> CGFloat { + return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) } - - // Return RAM usage (GB) - func memoryUsage() -> Double { - var taskInfo = mach_task_basic_info() - var count = mach_msg_type_number_t(MemoryLayout.size) / 4 - let kerr: kern_return_t = withUnsafeMutablePointer(to: &taskInfo) { - $0.withMemoryRebound(to: integer_t.self, capacity: 1) { - task_info(mach_task_self_, task_flavor_t(MACH_TASK_BASIC_INFO), $0, &count) - } - } - if kerr == KERN_SUCCESS { - return Double(taskInfo.resident_size) / 1E9 // Bytes to GB - } else { - return 0 + + func update(scale factor: CGFloat) { + do { + try device.lockForConfiguration() + defer { + device.unlockForConfiguration() } + device.videoZoomFactor = factor + } catch { + print("\(error.localizedDescription)") + } } - - func show(predictions: [VNRecognizedObjectObservation]) { - let width = videoPreview.bounds.width // 375 pix - let height = videoPreview.bounds.height // 812 pix - var str = "" - - // ratio = videoPreview AR divided by sessionPreset AR - var ratio: CGFloat = 1.0 - if videoCapture.captureSession.sessionPreset == .photo { - ratio = (height / width) / (4.0 / 3.0) // .photo - } else { - ratio = (height / width) / (16.0 / 9.0) // .hd4K3840x2160, .hd1920x1080, .hd1280x720 etc. - } - - // date - let date = Date() - let calendar = Calendar.current - let hour = calendar.component(.hour, from: date) - let minutes = calendar.component(.minute, from: date) - let seconds = calendar.component(.second, from: date) - let nanoseconds = calendar.component(.nanosecond, from: date) - let sec_day = Double(hour) * 3600.0 + Double(minutes) * 60.0 + Double(seconds) + Double(nanoseconds) / 1E9 // seconds in the day - - self.labelSlider.text = String(predictions.count) + " items (max " + String(Int(slider.value)) + ")" - for i in 0..= 1 { // iPhone ratio = 1.218 - let offset = (1 - ratio) * (0.5 - rect.minX) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: offset, y: -1) - rect = rect.applying(transform) - rect.size.width *= ratio - } else { // iPad ratio = 0.75 - let offset = (ratio - 1) * (0.5 - rect.maxY) - let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: offset - 1) - rect = rect.applying(transform) - rect.size.height /= ratio - } - - // Scale normalized to pixels [375, 812] [width, height] - rect = VNImageRectForNormalizedRect(rect, Int(width), Int(height)) - - // The labels array is a list of VNClassificationObservation objects, - // with the highest scoring class first in the list. - let bestClass = prediction.labels[0].identifier - let confidence = prediction.labels[0].confidence - // print(confidence, rect) // debug (confidence, xywh) with xywh origin top left (pixels) - - // Show the bounding box. - boundingBoxViews[i].show(frame: rect, - label: String(format: "%@ %.1f", bestClass, confidence * 100), - color: colors[bestClass] ?? UIColor.white, - alpha: CGFloat((confidence - 0.2) / (1.0 - 0.2) * 0.9)) // alpha 0 (transparent) to 1 (opaque) for conf threshold 0.2 to 1.0) - - if developerMode { - // Write - if save_detections { - str += String(format: "%.3f %.3f %.3f %@ %.2f %.1f %.1f %.1f %.1f\n", - sec_day, freeSpace(), UIDevice.current.batteryLevel, bestClass, confidence, - rect.origin.x, rect.origin.y, rect.size.width, rect.size.height) - } - - // Action trigger upon detection - // if false { - // if (bestClass == "car") { // "cell phone", "car", "person" - // self.takePhoto(nil) - // // self.pauseButton(nil) - // sleep(2) - // } - // } - } - } else { - boundingBoxViews[i].hide() - } - } - - // Write - if developerMode { - if save_detections { - saveText(text: str, file: "detections.txt") // Write stats for each detection - } - if save_frames { - str = String(format: "%.3f %.3f %.3f %.3f %.1f %.1f %.1f\n", - sec_day, freeSpace(), memoryUsage(), UIDevice.current.batteryLevel, - self.t1 * 1000, self.t2 * 1000, 1 / self.t4) - saveText(text: str, file: "frames.txt") // Write stats for each image - } - } - - // Debug - // print(str) - // print(UIDevice.current.identifierForVendor!) - // saveImage() + + let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) + switch pinch.state { + case .began, .changed: + update(scale: newScaleFactor) + self.labelZoom.text = String(format: "%.2fx", newScaleFactor) + self.labelZoom.font = UIFont.preferredFont(forTextStyle: .title2) + case .ended: + lastZoomFactor = minMaxZoom(newScaleFactor) + update(scale: lastZoomFactor) + self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) + default: break } - - // Pinch to Zoom Start --------------------------------------------------------------------------------------------- - let minimumZoom: CGFloat = 1.0 - let maximumZoom: CGFloat = 10.0 - var lastZoomFactor: CGFloat = 1.0 - - @IBAction func pinch(_ pinch: UIPinchGestureRecognizer) { - let device = videoCapture.captureDevice - - // Return zoom value between the minimum and maximum zoom values - func minMaxZoom(_ factor: CGFloat) -> CGFloat { - return min(min(max(factor, minimumZoom), maximumZoom), device.activeFormat.videoMaxZoomFactor) - } - - func update(scale factor: CGFloat) { - do { - try device.lockForConfiguration() - defer { - device.unlockForConfiguration() - } - device.videoZoomFactor = factor - } catch { - print("\(error.localizedDescription)") - } - } - - let newScaleFactor = minMaxZoom(pinch.scale * lastZoomFactor) - switch pinch.state { - case .began: fallthrough - case .changed: - update(scale: newScaleFactor) - self.labelZoom.text = String(format: "%.2fx", newScaleFactor) - self.labelZoom.font = UIFont.preferredFont(forTextStyle: .title2) - case .ended: - lastZoomFactor = minMaxZoom(newScaleFactor) - update(scale: lastZoomFactor) - self.labelZoom.font = UIFont.preferredFont(forTextStyle: .body) - default: break - } - } // Pinch to Zoom Start - - func showShareAlert(image: UIImage) { - let alertController = UIAlertController(title: "Do you want to share this image?", message: nil, preferredStyle: .alert) - - let shareAction = UIAlertAction(title: "OK", style: .default) { _ in - self.shareImage(image: image) - } - - let cancelAction = UIAlertAction(title: "Cancel", style: .cancel) { _ in - self.hideScreenshotImageView() - } - - alertController.addAction(shareAction) - alertController.addAction(cancelAction) - - if let popoverController = alertController.popoverPresentationController { - popoverController.sourceView = self.view - popoverController.sourceRect = CGRect(x: self.view.bounds.midX, y: self.view.bounds.maxY - 100, width: 0, height: 0) - popoverController.permittedArrowDirections = [] - } - - present(alertController, animated: true, completion: nil) - } - - func shareImage(image: UIImage) { - let activityViewController = UIActivityViewController(activityItems: [image], applicationActivities: nil) - activityViewController.popoverPresentationController?.sourceView = self.View0 - self.present(activityViewController, animated: true) { - self.hideScreenshotImageView() - } + } // Pinch to Zoom Start + + func showShareAlert(image: UIImage) { + let alertController = UIAlertController( + title: "Do you want to share this image?", message: nil, preferredStyle: .alert) + + let shareAction = UIAlertAction(title: "OK", style: .default) { _ in + self.shareImage(image: image) + } + + let cancelAction = UIAlertAction(title: "Cancel", style: .cancel) { _ in + self.hideScreenshotImageView() } - - func hideScreenshotImageView() { - self.screenshotImageView?.removeFromSuperview() - self.screenshotImageView = nil + + alertController.addAction(shareAction) + alertController.addAction(cancelAction) + + if let popoverController = alertController.popoverPresentationController { + popoverController.sourceView = self.view + popoverController.sourceRect = CGRect( + x: self.view.bounds.midX, y: self.view.bounds.maxY - 100, width: 0, height: 0) + popoverController.permittedArrowDirections = [] } - // ------------------------------------------------------------------------------------------ + + present(alertController, animated: true, completion: nil) + } + + func shareImage(image: UIImage) { + let activityViewController = UIActivityViewController( + activityItems: [image], applicationActivities: nil) + activityViewController.popoverPresentationController?.sourceView = self.View0 + self.present(activityViewController, animated: true) { + self.hideScreenshotImageView() + } + } + + func hideScreenshotImageView() { + self.screenshotImageView?.removeFromSuperview() + self.screenshotImageView = nil + } + // ------------------------------------------------------------------------------------------ } // ViewController class End extension ViewController: VideoCaptureDelegate { - func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { - predict(sampleBuffer: sampleBuffer) - } + func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { + predict(sampleBuffer: sampleBuffer) + } } // Programmatically save image extension ViewController: AVCapturePhotoCaptureDelegate { - func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) { - if let error = error { - print("error occurred : \(error.localizedDescription)") - } - if let dataImage = photo.fileDataRepresentation() { - let dataProvider = CGDataProvider(data: dataImage as CFData) - let cgImageRef: CGImage! = CGImage(jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, intent: .defaultIntent) - var orientation = UIImage.Orientation.right - switch UIDevice.current.orientation { - case .landscapeLeft: - orientation = .up - case .landscapeRight: - orientation = .down - default: - break - } - var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: orientation) - let imageView = UIImageView(image: image) - imageView.contentMode = .scaleAspectFill - imageView.frame = videoPreview.frame - let imageLayer = imageView.layer - var sublayers = videoPreview.layer.sublayers ?? [] - let insertIndex = max(sublayers.count - 1, 0) - videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) - - let bounds = UIScreen.main.bounds - UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) - self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) - let img = UIGraphicsGetImageFromCurrentImageContext() - UIGraphicsEndImageContext() - imageLayer.removeFromSuperlayer() - - let screenshotImageView = UIImageView(image: img) - screenshotImageView.frame = view.bounds - screenshotImageView.contentMode = .scaleAspectFit - view.addSubview(screenshotImageView) - self.screenshotImageView = screenshotImageView - - UIView.animate(withDuration: 0.3, animations: { - screenshotImageView.frame = CGRect(x: 20, y: 100, width: self.view.bounds.width - 40, height: self.view.bounds.height - 200) - }) { _ in - self.showShareAlert(image: img!) - } -// -// // Save to camera roll -// UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); - } else { - print("AVCapturePhotoCaptureDelegate Error") + func photoOutput( + _ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error? + ) { + if let error = error { + print("error occurred : \(error.localizedDescription)") + } + if let dataImage = photo.fileDataRepresentation() { + let dataProvider = CGDataProvider(data: dataImage as CFData) + let cgImageRef: CGImage! = CGImage( + jpegDataProviderSource: dataProvider!, decode: nil, shouldInterpolate: true, + intent: .defaultIntent) + var orientation = UIImage.Orientation.right + switch UIDevice.current.orientation { + case .landscapeLeft: + orientation = .up + case .landscapeRight: + orientation = .down + default: + break + } + var image = UIImage(cgImage: cgImageRef, scale: 0.5, orientation: orientation) + let imageView = UIImageView(image: image) + imageView.contentMode = .scaleAspectFill + imageView.frame = videoPreview.frame + let imageLayer = imageView.layer + var sublayers = videoPreview.layer.sublayers ?? [] + let insertIndex = max(sublayers.count - 1, 0) + videoPreview.layer.insertSublayer(imageLayer, above: videoCapture.previewLayer) + + let bounds = UIScreen.main.bounds + UIGraphicsBeginImageContextWithOptions(bounds.size, true, 0.0) + self.View0.drawHierarchy(in: bounds, afterScreenUpdates: true) + let img = UIGraphicsGetImageFromCurrentImageContext() + UIGraphicsEndImageContext() + imageLayer.removeFromSuperlayer() + + let screenshotImageView = UIImageView(image: img) + screenshotImageView.frame = view.bounds + screenshotImageView.contentMode = .scaleAspectFit + view.addSubview(screenshotImageView) + self.screenshotImageView = screenshotImageView + + UIView.animate( + withDuration: 0.3, + animations: { + screenshotImageView.frame = CGRect( + x: 20, y: 100, width: self.view.bounds.width - 40, height: self.view.bounds.height - 200 + ) } + ) { _ in + self.showShareAlert(image: img!) + } + // + // // Save to camera roll + // UIImageWriteToSavedPhotosAlbum(img!, nil, nil, nil); + } else { + print("AVCapturePhotoCaptureDelegate Error") } + } } From e9a28cf6bb0571ba09fa412ad1a253455c5159f1 Mon Sep 17 00:00:00 2001 From: john-rocky Date: Sun, 18 Aug 2024 15:21:20 +0900 Subject: [PATCH 05/26] add seg. --- YOLO.xcodeproj/project.pbxproj | 49 +++- YOLO/Main.storyboard | 15 +- YOLO/Utilities/Colors.swift | 33 +++ YOLO/Utilities/PostProcessSegment.swift | 316 +++++++++++++++++++++++ YOLO/ViewController.swift | 324 +++++++++++++++++------- 5 files changed, 634 insertions(+), 103 deletions(-) create mode 100644 YOLO/Utilities/Colors.swift create mode 100644 YOLO/Utilities/PostProcessSegment.swift diff --git a/YOLO.xcodeproj/project.pbxproj b/YOLO.xcodeproj/project.pbxproj index 7129783..0e0349b 100644 --- a/YOLO.xcodeproj/project.pbxproj +++ b/YOLO.xcodeproj/project.pbxproj @@ -13,11 +13,20 @@ 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA221E62DD300DE43BC /* VideoCapture.swift */; }; 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 636EFCA721E62DD300DE43BC /* AppDelegate.swift */; }; 636EFCB921E62E3900DE43BC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 636EFCB821E62E3900DE43BC /* Assets.xcassets */; }; + 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */; }; 63CF371F2514455300E2DEA1 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44D22186177008AE681 /* LaunchScreen.storyboard */; }; 63CF37202514455300E2DEA1 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6323C44F22186177008AE681 /* Main.storyboard */; }; 63CF37212514455300E2DEA1 /* ultralytics_yolo_logotype.png in Resources */ = {isa = PBXBuildFile; fileRef = 6323C45122186177008AE681 /* ultralytics_yolo_logotype.png */; }; + 733310CD2C71BEB0001D647B /* yolov8x-seg.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733310C82C71BEAB001D647B /* yolov8x-seg.mlpackage */; }; + 733310CE2C71BEB0001D647B /* yolov8n-seg.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733310C92C71BEAC001D647B /* yolov8n-seg.mlpackage */; }; + 733310CF2C71BEB0001D647B /* yolov8m-seg.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733310CA2C71BEAD001D647B /* yolov8m-seg.mlpackage */; }; + 733310D02C71BEB0001D647B /* yolov8s-seg.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733310CB2C71BEAE001D647B /* yolov8s-seg.mlpackage */; }; + 733310D12C71BEB0001D647B /* yolov8l-seg.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 733310CC2C71BEB0001D647B /* yolov8l-seg.mlpackage */; }; + 733310D42C71C5CE001D647B /* Colors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733310D22C71C5CE001D647B /* Colors.swift */; }; + 733310D52C71C5CE001D647B /* PostProcessSegment.swift in Sources */ = {isa = PBXBuildFile; fileRef = 733310D32C71C5CE001D647B /* PostProcessSegment.swift */; }; 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */; }; /* End PBXBuildFile section */ + /* Begin PBXFileReference section */ 6323C44D22186177008AE681 /* LaunchScreen.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = LaunchScreen.storyboard; sourceTree = ""; }; 6323C44F22186177008AE681 /* Main.storyboard */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; path = Main.storyboard; sourceTree = ""; }; @@ -29,7 +38,19 @@ 636EFCA221E62DD300DE43BC /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; 636EFCA721E62DD300DE43BC /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 636EFCB821E62E3900DE43BC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8l.mlpackage; sourceTree = ""; }; + 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8x.mlpackage; sourceTree = ""; }; + 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8s.mlpackage; sourceTree = ""; }; + 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8m.mlpackage; sourceTree = ""; }; + 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = yolov8n.mlpackage; sourceTree = ""; }; 63B8B0A821E62A890026FBC3 /* .gitignore */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .gitignore; sourceTree = ""; }; + 733310C82C71BEAB001D647B /* yolov8x-seg.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8x-seg.mlpackage"; sourceTree = ""; }; + 733310C92C71BEAC001D647B /* yolov8n-seg.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8n-seg.mlpackage"; sourceTree = ""; }; + 733310CA2C71BEAD001D647B /* yolov8m-seg.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8m-seg.mlpackage"; sourceTree = ""; }; + 733310CB2C71BEAE001D647B /* yolov8s-seg.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8s-seg.mlpackage"; sourceTree = ""; }; + 733310CC2C71BEB0001D647B /* yolov8l-seg.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = "yolov8l-seg.mlpackage"; sourceTree = ""; }; + 733310D22C71C5CE001D647B /* Colors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Colors.swift; sourceTree = ""; }; + 733310D32C71C5CE001D647B /* PostProcessSegment.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PostProcessSegment.swift; sourceTree = ""; }; 7BCB411721C3096100BFC4D0 /* YOLO.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = YOLO.app; sourceTree = BUILT_PRODUCTS_DIR; }; 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BoundingBoxView.swift; sourceTree = ""; }; 8EDAAA4507D2D23D7FAB827F /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -51,6 +72,8 @@ children = ( 636166E9251443B20054FA7E /* ThresholdProvider.swift */, 8EDAA633C1F2B50286D16008 /* BoundingBoxView.swift */, + 733310D32C71C5CE001D647B /* PostProcessSegment.swift */, + 733310D22C71C5CE001D647B /* Colors.swift */, ); path = Utilities; sourceTree = ""; @@ -76,6 +99,16 @@ 63A946D8271800E20001C3ED /* Models */ = { isa = PBXGroup; children = ( + 6381D2132B7817C200ABA4E8 /* yolov8l.mlpackage */, + 6381D2162B7817C200ABA4E8 /* yolov8m.mlpackage */, + 6381D2172B7817C200ABA4E8 /* yolov8n.mlpackage */, + 6381D2152B7817C200ABA4E8 /* yolov8s.mlpackage */, + 6381D2142B7817C200ABA4E8 /* yolov8x.mlpackage */, + 733310CC2C71BEB0001D647B /* yolov8l-seg.mlpackage */, + 733310CA2C71BEAD001D647B /* yolov8m-seg.mlpackage */, + 733310C92C71BEAC001D647B /* yolov8n-seg.mlpackage */, + 733310CB2C71BEAE001D647B /* yolov8s-seg.mlpackage */, + 733310C82C71BEAB001D647B /* yolov8x-seg.mlpackage */, ); path = Models; sourceTree = ""; @@ -194,11 +227,23 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, + 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, + 733310CF2C71BEB0001D647B /* yolov8m-seg.mlpackage in Sources */, + 733310D12C71BEB0001D647B /* yolov8l-seg.mlpackage in Sources */, + 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, + 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, + 6381D21B2B7817C200ABA4E8 /* yolov8m.mlpackage in Sources */, + 733310D52C71C5CE001D647B /* PostProcessSegment.swift in Sources */, 636EFCAF21E62DD300DE43BC /* VideoCapture.swift in Sources */, + 733310D02C71BEB0001D647B /* yolov8s-seg.mlpackage in Sources */, + 733310D42C71C5CE001D647B /* Colors.swift in Sources */, 636166EA251443B20054FA7E /* ThresholdProvider.swift in Sources */, 636EFCB321E62DD300DE43BC /* AppDelegate.swift in Sources */, 636EFCAA21E62DD300DE43BC /* ViewController.swift in Sources */, + 733310CD2C71BEB0001D647B /* yolov8x-seg.mlpackage in Sources */, 8EDAA33950796844333D60A7 /* BoundingBoxView.swift in Sources */, + 733310CE2C71BEB0001D647B /* yolov8n-seg.mlpackage in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -330,7 +375,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = 3MR4P6CL3X; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; @@ -358,7 +403,7 @@ ASSETCATALOG_COMPILER_INCLUDE_ALL_APPICON_ASSETS = NO; CODE_SIGN_STYLE = Automatic; CURRENT_PROJECT_VERSION = 0; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = 3MR4P6CL3X; INFOPLIST_FILE = YOLO/Info.plist; INFOPLIST_KEY_CFBundleDisplayName = "Ultralytics YOLO"; INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; diff --git a/YOLO/Main.storyboard b/YOLO/Main.storyboard index 549bc72..b43e17b 100644 --- a/YOLO/Main.storyboard +++ b/YOLO/Main.storyboard @@ -55,8 +55,8 @@ -