From 2fb049e678ab48a7b950cd15ab2071f6429b364d Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Wed, 10 Feb 2021 14:05:39 -0500 Subject: [PATCH 01/34] updated tracker to have 1.0 robustness --- .gitignore | 4 +- Scripts/Andrew01.swift | 116 ++++++++++++++++++ Scripts/Fan12.swift | 4 +- Scripts/main.swift | 2 +- .../BeeTracking/ProbabilisticTracker.swift | 7 +- Sources/BeeTracking/TrackingMetrics.swift | 17 ++- Sources/BeeTracking/Visualizations.swift | 50 ++++++++ 7 files changed, 183 insertions(+), 17 deletions(-) create mode 100644 Scripts/Andrew01.swift diff --git a/.gitignore b/.gitignore index 83e982fd..61b89970 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ xcuserdata/ /.vscode /.idea /OIST_Data/ -/Results/ \ No newline at end of file +/Results/ +*.npy +*.json \ No newline at end of file diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift new file mode 100644 index 00000000..7f86f807 --- /dev/null +++ b/Scripts/Andrew01.swift @@ -0,0 +1,116 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Andrew01: RAE Tracker +struct Andrew01: ParsableCommand { + + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Size of feature space") + var featureSize: Int = 256 + + @Option(help: "Pretrained weights") + var weightsFile: String? + + // Runs RAE tracker on n number of sequences and outputs relevant images and statistics + // Make sure you have a folder `Results/andrew01` before running + func run() { + let np = Python.import("numpy") + let kHiddenDimension = 512 + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + + if let weightsFile = weightsFile { + rae.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + } + + // let (imageHeight, imageWidth, imageChannels) = + // (40, 70, 1) + + // let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize) + + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let evalTracker: Tracker = {frames, start in + var tracker = trainProbabilisticTracker( + trainingData: data, + encoder: rae, + frames: frames, + boundingBoxSize: (40, 70), + withFeatureSize: featureSize, + fgRandomFrameCount: trainingDatasetSize, + bgRandomFrameCount: trainingDatasetSize + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + + return track + } + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 19 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + + + + } +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Fan12.swift b/Scripts/Fan12.swift index cf4649b7..809226f7 100644 --- a/Scripts/Fan12.swift +++ b/Scripts/Fan12.swift @@ -12,7 +12,7 @@ struct Fan12: ParsableCommand { typealias LikelihoodModel = TrackingLikelihoodModel @Option(help: "Size of feature space") - var featureSize: Int = 5 + var featureSize: Int = 256 @Flag(help: "Training mode") var training: Bool = false @@ -31,7 +31,7 @@ struct Fan12: ParsableCommand { // Just runs an RP tracker and saves image to file // Make sure you have a folder `Results/fan12` before running func run() { - let kHiddenDimension = 100 + let kHiddenDimension = 512 let dataDir = URL(fileURLWithPath: "./OIST_Data") let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! diff --git a/Scripts/main.swift b/Scripts/main.swift index 07281539..9b96f9c1 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -17,7 +17,7 @@ import PenguinParallelWithFoundation struct Scripts: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, + subcommands: [Andrew01.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, Frank01.self, Frank02.self, Frank03.self, Frank04.self]) } diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift index 24231d77..472b5793 100644 --- a/Sources/BeeTracking/ProbabilisticTracker.swift +++ b/Sources/BeeTracking/ProbabilisticTracker.swift @@ -149,12 +149,13 @@ public func trainProbabilisticTracker( bgRandomFrameCount: bgRandomFrameCount, useCache: true ) - + //Adjust numberOfTrainingSamples? let batchPositive = encoder.encode(fg) let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) let batchNegative = encoder.encode(bg) - let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) + //Try another MultivariateGaussian here + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) let tracker = makeProbabilisticTracker( model: encoder, @@ -226,7 +227,7 @@ public func makeProbabilisticTracker< addFixedBetweenFactor: { (values, variables, graph) -> () in let (prior) = unpack(values) let (poseID) = unpack(variables) - graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 4.6, sdTheta: 0.3)) + graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 8, sdTheta:0.4)) }) } diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index f441c706..3783197c 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -200,10 +200,6 @@ extension TrackerEvaluationDataset { expectedAverageOverlap: ExpectedAverageOverlap( sequenceEvaluations.flatMap { $0.subsequences }.map { $0.metrics })) - let encoder = JSONEncoder() - let data = try! encoder.encode(result) - FileManager.default.createFile(atPath: "\(outputFile).json", contents: data, attributes: nil) - return result } } @@ -263,16 +259,15 @@ extension TrackerEvaluationSequence { let subsequenceEvaluations = zip(subsequences, subsequencePredictions).map { SubsequenceEvaluationResults( metrics: SubsequenceMetrics(groundTruth: $0.0.groundTruth, prediction: $0.1), - prediction: $0.1) + prediction: $0.1, + groundTruth: $0.0.groundTruth, + frames: $0.0.frames) } + let result = SequenceEvaluationResults( subsequences: subsequenceEvaluations, sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics })) - let encoder = JSONEncoder() - let data = try! encoder.encode(result) - FileManager.default.createFile(atPath: "\(outputFile).json", contents: data, attributes: nil) - return result } } @@ -286,7 +281,7 @@ extension TrackerEvaluationDataset { for track in video.tracks { let sequence = TrackerEvaluationSequence( frames: Array( - video.frames[track.startFrameIndex..<(track.startFrameIndex + track.boxes.count)]), + video.frames[track.startFrameIndex..<(track.boxes.count)]), groundTruth: track.boxes) sequences.append(sequence) } @@ -318,6 +313,8 @@ public struct SequenceEvaluationResults: Codable { public struct SubsequenceEvaluationResults: Codable { public let metrics: SubsequenceMetrics public let prediction: [OrientedBoundingBox] + public let groundTruth: [OrientedBoundingBox] + public let frames: [Tensor] } /// Given `frames` and a `start` region containing an object to track, returns predicted regions diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift index 4a8b9eb7..cb23eddd 100644 --- a/Sources/BeeTracking/Visualizations.swift +++ b/Sources/BeeTracking/Visualizations.swift @@ -54,6 +54,12 @@ public func plotOverlap( ax.set_title("Overlap") } +/// Plot the tracking metrics +public func plotOverlap(metrics: SubsequenceMetrics, on ax: PythonObject) { + ax.plot(metrics.overlap) + ax.set_title("Overlap") +} + /// plot Comparison image public func plotPatchWithGT(frame: Tensor, actual: Pose2, expected: Pose2) -> (PythonObject, PythonObject) { let plt = Python.import("matplotlib.pyplot") @@ -71,6 +77,50 @@ public func plotPatchWithGT(frame: Tensor, actual: Pose2, expected: Pose2 return (fig, ax) } +public func plotPoseDifference(track: [Pose2], withGroundTruth expected: [Pose2], on ax: PythonObject) { + var thetaDiff = zip(track, expected).map{pow(($0.0.rot.theta - $0.1.rot.theta), 2.0)} + var posDiff = zip(track, expected).map{pow(($0.0.t.x - $0.1.t.x), 2.0) + pow(($0.0.t.y - $0.1.t.y), 2.0)} + ax.plot(thetaDiff, posDiff) + ax.set_title("L2 Theta Difference (X-axis) vs. L2 X, Y Difference Over Time") +} + +/// plot Comparison image +public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) { + let plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + + let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + ax.imshow(frame.makeNumpyArray() / 255.0) + let actualBoundingBox = OrientedBoundingBox(center: actual, rows: 40, cols: 70) + ax.plot(Python.list(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x]), Python.list(actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y]), "r-") + // ax.plot(Python.tuple(actualBoundingBox.rot.) + var supportPatch = mpl.patches.RegularPolygon( + Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"r", + orientation: actualBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + + let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) + ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "g-") + + supportPatch = mpl.patches.RegularPolygon( + Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"g", + orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + + ax.title.set_text("Prediction (Red) vs. Actual (Green)") + return (fig, ax) +} + /// Calculate the translation error plane (X-Y) public func errorPlaneTranslation< Encoder: AppearanceModelEncoder, From 483bfa2f2d0e0dd25918f720c194591a99867ae5 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Wed, 10 Feb 2021 14:21:34 -0500 Subject: [PATCH 02/34] removed unecessary comments --- Sources/BeeTracking/ProbabilisticTracker.swift | 2 -- 1 file changed, 2 deletions(-) diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift index 472b5793..c812e1a3 100644 --- a/Sources/BeeTracking/ProbabilisticTracker.swift +++ b/Sources/BeeTracking/ProbabilisticTracker.swift @@ -149,12 +149,10 @@ public func trainProbabilisticTracker( bgRandomFrameCount: bgRandomFrameCount, useCache: true ) - //Adjust numberOfTrainingSamples? let batchPositive = encoder.encode(fg) let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) let batchNegative = encoder.encode(bg) - //Try another MultivariateGaussian here let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) let tracker = makeProbabilisticTracker( From 1d65667b292f55a1362055af5010aa9d22acebfd Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Wed, 10 Feb 2021 15:05:29 -0500 Subject: [PATCH 03/34] removed call to Python.list and validated functionality. --- Sources/BeeTracking/Visualizations.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift index cb23eddd..334e2ab6 100644 --- a/Sources/BeeTracking/Visualizations.swift +++ b/Sources/BeeTracking/Visualizations.swift @@ -92,7 +92,7 @@ public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 ax.imshow(frame.makeNumpyArray() / 255.0) let actualBoundingBox = OrientedBoundingBox(center: actual, rows: 40, cols: 70) - ax.plot(Python.list(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x]), Python.list(actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y]), "r-") + ax.plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-") // ax.plot(Python.tuple(actualBoundingBox.rot.) var supportPatch = mpl.patches.RegularPolygon( Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), From 65da84daf016ffed209b7eec3695dc95a088efa6 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Fri, 27 Aug 2021 20:49:48 +0200 Subject: [PATCH 04/34] updating all the Source files --- Sources/BeeDataset/Visualization.swift | 142 ++-- .../AppearanceRAE+Serialization.swift | 68 ++ Sources/BeeTracking/NNClassifier.swift | 605 ++++++++++++++++++ .../BeeTracking/OISTBeeVideo+Batches.swift | 5 +- .../BeeTracking/ProbabilisticTracker.swift | 5 +- .../BeeTracking/ProbabilisticTracker2.swift | 72 +++ Sources/BeeTracking/TrackingFactorGraph.swift | 60 +- Sources/BeeTracking/TrackingMetrics.swift | 19 +- Sources/BeeTracking/Visualizations.swift | 138 +++- .../Inference/FactorBoilerplate.swift | 26 +- .../LatentAppearanceTrackingFactor.swift | 16 +- .../ProbablisticTrackingFactor.swift | 46 ++ .../Optimizers/GradientDescent.swift | 10 + Sources/SwiftFusion/Optimizers/LM.swift | 7 + .../Optimizers/OptimizerProtocol.swift | 5 + .../Probability/MultivariateGaussian.swift | 4 +- 16 files changed, 1131 insertions(+), 97 deletions(-) create mode 100644 Sources/BeeTracking/NNClassifier.swift create mode 100644 Sources/BeeTracking/ProbabilisticTracker2.swift create mode 100644 Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift diff --git a/Sources/BeeDataset/Visualization.swift b/Sources/BeeDataset/Visualization.swift index 6558c08b..78975fb9 100644 --- a/Sources/BeeDataset/Visualization.swift +++ b/Sources/BeeDataset/Visualization.swift @@ -1,76 +1,76 @@ -// Copyright 2020 The SwiftFusion Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// // Copyright 2020 The SwiftFusion Authors. All Rights Reserved. +// // +// // Licensed under the Apache License, Version 2.0 (the "License"); +// // you may not use this file except in compliance with the License. +// // You may obtain a copy of the License at +// // +// // http://www.apache.org/licenses/LICENSE-2.0 +// // +// // Unless required by applicable law or agreed to in writing, software +// // distributed under the License is distributed on an "AS IS" BASIS, +// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// // See the License for the specific language governing permissions and +// // limitations under the License. -import SwiftFusion -import TensorFlow -import Plotly -import ModelSupport -import Foundation +// import SwiftFusion +// import TensorFlow +// // import Plotly +// import ModelSupport +// import Foundation -/// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on -/// them. -public func plot( - _ frame: Tensor, boxes: [(name: String, OrientedBoundingBox)] = [], - margin: Double = 30, scale: Double = 1 -) -> Plotly.Figure { - let rows = Double(frame.shape[0]) - let cols = Double(frame.shape[1]) +// /// Creates a Plotly figure that displays `frame`, with optional `boxes` overlaid on +// /// them. +// public func plot( +// _ frame: Tensor, boxes: [(name: String, OrientedBoundingBox)] = [], +// margin: Double = 30, scale: Double = 1 +// ) -> Plotly.Figure { +// let rows = Double(frame.shape[0]) +// let cols = Double(frame.shape[1]) - // Axis settings: - // - no grid - // - range is the image size - // - scale is anchored, to preserve image aspect ratio - // - y axis reversed so that everything is in "(u, v)" coordinates - let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false) - let yAx = Layout.YAxis( - autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false) +// // Axis settings: +// // - no grid +// // - range is the image size +// // - scale is anchored, to preserve image aspect ratio +// // - y axis reversed so that everything is in "(u, v)" coordinates +// let xAx = Layout.XAxis(range: [0, InfoArray(cols)], showGrid: false) +// let yAx = Layout.YAxis( +// autoRange: .reversed, range: [0, InfoArray(rows)], scaleAnchor: .xAxis(xAx), showGrid: false) - let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png") - ModelSupport.Image(Tensor(frame)).save(to: tmpPath) - let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString() +// let tmpPath = URL(fileURLWithPath: "tmpForPlotlyDisplay.png") +// ModelSupport.Image(Tensor(frame)).save(to: tmpPath) +// let imageData = try! "data:image/png;base64," + Data(contentsOf: tmpPath).base64EncodedString() - return Figure( - data: [ - // Dummy data because Plotly is confused when there is no data. - Scatter( - x: [0, cols], y: [0, rows], - mode: .markers, marker: Shared.GradientMarker(opacity: 0), - xAxis: xAx, yAxis: yAx - ) - ] + boxes.map { box in - Scatter( - name: box.name, - x: box.1.corners.map { $0.x }, - y: box.1.corners.map { $0.y }, - xAxis: xAx, - yAxis: yAx - ) - }, - layout: Layout( - width: cols * scale + 2 * margin, - height: rows * scale + 2 * margin, - margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin), - images: [ - Layout.Image( - visible: true, - source: imageData, - layer: .below, - xSize: cols, ySize: rows, - sizing: .stretch, - x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx) - ) - ] - ) - ) -} \ No newline at end of file +// return Figure( +// data: [ +// // Dummy data because Plotly is confused when there is no data. +// Scatter( +// x: [0, cols], y: [0, rows], +// mode: .markers, marker: Shared.GradientMarker(opacity: 0), +// xAxis: xAx, yAxis: yAx +// ) +// ] + boxes.map { box in +// Scatter( +// name: box.name, +// x: box.1.corners.map { $0.x }, +// y: box.1.corners.map { $0.y }, +// xAxis: xAx, +// yAxis: yAx +// ) +// }, +// layout: Layout( +// width: cols * scale + 2 * margin, +// height: rows * scale + 2 * margin, +// margin: Layout.Margin(l: margin, r: margin, t: margin, b: margin), +// images: [ +// Layout.Image( +// visible: true, +// source: imageData, +// layer: .below, +// xSize: cols, ySize: rows, +// sizing: .stretch, +// x: 0, y: 0, xReference: .xAxis(xAx), yReference: .yAxis(yAx) +// ) +// ] +// ) +// ) +// } \ No newline at end of file diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift index 6c0d5f05..d8227b8b 100644 --- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift +++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift @@ -8,6 +8,8 @@ extension Dense where Scalar: NumpyScalarCompatible { mutating func load(weights: PythonObject) { let weight = Tensor(numpy: weights[0])! let bias = Tensor(numpy: weights[1])! + print(self.weight.shape) + print(weight.shape) precondition( self.weight.shape == weight.shape, "expected weight matrix \(self.weight.shape) but got \(weight.shape)") @@ -71,4 +73,70 @@ extension DenseRAE { self.decoder_conv1.numpyWeights ].reduce([], +) } + +} + + +extension NNClassifier { + /// Loads model weights from the numpy arrays in `weights`. + public mutating func load(weights: PythonObject) { + self.encoder_conv1.load(weights: weights[0..<2]) + self.encoder1.load(weights: weights[2..<4]) + self.encoder2.load(weights: weights[4..<6]) + self.encoder3.load(weights: weights[6..<8]) + } + + /// The model weights as numpy arrays. + public var numpyWeights: PythonObject { + [ + self.encoder_conv1.numpyWeights, + self.encoder1.numpyWeights, + self.encoder2.numpyWeights, + self.encoder3.numpyWeights + ].reduce([], +) + } +} + + +extension SmallerNNClassifier { + /// Loads model weights from the numpy arrays in `weights`. + public mutating func load(weights: PythonObject) { + self.encoder_conv1.load(weights: weights[0..<2]) + self.encoder1.load(weights: weights[2..<4]) + self.encoder2.load(weights: weights[4..<6]) + } + + /// The model weights as numpy arrays. + public var numpyWeights: PythonObject { + [ + self.encoder_conv1.numpyWeights, + self.encoder1.numpyWeights, + self.encoder2.numpyWeights, + ].reduce([], +) + } +} + + + +extension LargerNNClassifier { + /// Loads model weights from the numpy arrays in `weights`. + public mutating func load(weights: PythonObject) { + self.encoder_conv1.load(weights: weights[0..<2]) + self.encoder1.load(weights: weights[2..<4]) + self.encoder2.load(weights: weights[4..<6]) + self.encoder3.load(weights: weights[6..<8]) + self.encoder4.load(weights: weights[8..<10]) + + } + + /// The model weights as numpy arrays. + public var numpyWeights: PythonObject { + [ + self.encoder_conv1.numpyWeights, + self.encoder1.numpyWeights, + self.encoder2.numpyWeights, + self.encoder3.numpyWeights, + self.encoder4.numpyWeights + ].reduce([], +) + } } diff --git a/Sources/BeeTracking/NNClassifier.swift b/Sources/BeeTracking/NNClassifier.swift new file mode 100644 index 00000000..c508099c --- /dev/null +++ b/Sources/BeeTracking/NNClassifier.swift @@ -0,0 +1,605 @@ +// Copyright 2020 The SwiftFusion Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import SwiftFusion +import TensorFlow +import PythonKit +import BeeDataset + +// MARK: - The Regularized Autoencoder model +/// A Regularized Autoencoder (RAE) [1] that encodes the appearance of an image patch. +/// + +public struct BeeBatch { + let patch: Tensor + let label: Tensor +} +/// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`. +extension BeeBatch: Collatable { + public init(collating samples: BatchSamples) + where BatchSamples.Element == Self { + patch = Tensor(stacking: samples.map{$0.patch}) + label = Tensor(stacking: samples.map{$0.label}) + } +} + + +/// [1] https://openreview.net/forum?id=S1g7tpEYDS +public struct NNClassifier: Layer{ + /// The height of the input image in pixels. + @noDerivative public let imageHeight: Int + + /// The width of the input image in pixels. + @noDerivative public let imageWidth: Int + + /// The number of channels in the input image. + @noDerivative public let imageChannels: Int + + /// The number of activations in the hidden layer. + @noDerivative public let hiddenDimension: Int + + /// The number of activations in the appearance code. + @noDerivative public let latentDimension: Int + + /// First conv to downside the image + public var encoder_conv1: Conv2D + + /// Max pooling of factor 2 + var encoder_pool1: MaxPool2D + + /// First FCN encoding layer goes from image to hidden dimension + public var encoder1: Dense + + /// Second goes from dense features to latent code + public var encoder2: Dense + + /// Third goes from latent to 1 + public var encoder3: Dense + + // /// Decode from latent to dense hidden layer with same dimsnions as before + // var decoder1: Dense + + // /// Finally, reconstruct grayscale (or RGB) image + // var decoder2: Dense + + // var decoder_upsample1: UpSampling2D + + // var decoder_conv1: Conv2D + + /// Creates an instance for images with size `[imageHeight, imageWidth, imageChannels]`, with + /// hidden and latent dimensions given by `hiddenDimension` and `latentDimension`. + public init( + imageHeight: Int, imageWidth: Int, imageChannels: Int, + hiddenDimension: Int, latentDimension: Int + ) { + self.imageHeight = imageHeight + self.imageWidth = imageWidth + self.imageChannels = imageChannels + self.hiddenDimension = hiddenDimension + self.latentDimension = latentDimension + + encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + + encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + + encoder1 = Dense( + inputSize: imageHeight * imageWidth * imageChannels / 4, + outputSize: hiddenDimension, + activation: relu) + + encoder2 = Dense( + inputSize: hiddenDimension, + outputSize: latentDimension, + activation: relu) + + encoder3 = Dense( + inputSize: latentDimension, + outputSize: 2) + + } + + /// Initialize given an image batch + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int) + // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { + public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) { + print("init from image batch") + // let shape = imageBatch.shape + // precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") + // let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) + // print("sizes", H_, W_, C_) + // training data shape [600, 40, 70, 1] + let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) + let (h,d) = parameters ?? (100,10) + var model = NNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, + hiddenDimension: h, latentDimension: d) + + let optimizer = Adam(for: model) + optimizer.learningRate = 1e-3 + + let lossFunc = NNClassifierLoss() + // Issues I came across: TrainingEpochs function was scrambling the order + // Then the map function was too slow during training. + + // Thread-local variable that model layers read to know their mode + Context.local.learningPhase = .training + + // print("Shape of imagebatch", imageBatch.shape) + // print("Shape of imagebatch", imageBatch.unstacked().count) + let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) + + let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array + // + var trainLossResults: [Double] = [] + let epochCount = 600 + for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { + // print("epoch") + // print(epochIndex) + var epochLoss: Double = 0 + var batchCount: Int = 0 + // epoch is a Slices object, see below + // print("encoder 1", model.encoder1) + // print("encoder 2", model.encoder2) + // print("encoder 3", model.encoder3) + for batchSamples in epoch { + // print(".") + let batch = batchSamples.collated + // let batch = Tensor(stacking: batchSamples.map { $0.frame!.patch(at: $0.obb) }) + // let type = [Int32](batchSamples.map { $0.type == TrackingLikelihoodModel.PatchType.bg ? 0 : 1}) + // print("..") + let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } + // print("...") + optimizer.update(&model, along: grad) + // print("....") + // print("encoder 1", model.encoder1) + // print("encoder 2", model.encoder2) + // print("encoder 3", model.encoder3) + epochLoss += loss.scalarized() + batchCount += 1 + } + epochLoss /= Double(batchCount) + trainLossResults.append(epochLoss) + if epochIndex % 5 == 0 { + print("\nEpoch \(epochIndex):", terminator:"") + } + print(" \(epochLoss),", terminator: "") + } + + self = model + } + + /// Differentiable encoder + @differentiable(wrt: imageBatch) + public func classify(_ imageBatch: Tensor) -> Tensor { + let batchSize = imageBatch.shape[0] + let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] + precondition( + imageBatch.shape == expectedShape, + "input shape is \(imageBatch.shape), but expected \(expectedShape)") + return imageBatch + .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) + .sequenced(through: encoder1, encoder2, encoder3) + } + + /// Standard: add syntactic sugar to apply model as a function call. + @differentiable + public func callAsFunction(_ imageBatch: Tensor) -> Tensor { + let output = classify(imageBatch) + return output + } +} + +/// [1] https://openreview.net/forum?id=S1g7tpEYDS +public struct SmallerNNClassifier: Layer{ + @noDerivative public let imageHeight: Int + @noDerivative public let imageWidth: Int + @noDerivative public let imageChannels: Int + @noDerivative public let latentDimension: Int + public var encoder_conv1: Conv2D + var encoder_pool1: MaxPool2D + public var encoder1: Dense + public var encoder2: Dense + + public init( + imageHeight: Int, imageWidth: Int, imageChannels: Int, latentDimension: Int + ) { + self.imageHeight = imageHeight + self.imageWidth = imageWidth + self.imageChannels = imageChannels + self.latentDimension = latentDimension + + encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + + encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + + encoder1 = Dense( + inputSize: imageHeight * imageWidth * imageChannels / 4, + outputSize: latentDimension, + activation: relu) + + encoder2 = Dense( + inputSize: latentDimension, + outputSize: 2) + + } + + /// Initialize given an image batch + public init(patches patches: Tensor, labels labels: Tensor, given latentDimension: Int? = nil) { + print("init from image batch") + let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) + let d = latentDimension ?? 10 + var model = SmallerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, latentDimension: d) + + let optimizer = Adam(for: model) + optimizer.learningRate = 1e-3 + + let lossFunc = NNClassifierLoss() + Context.local.learningPhase = .training + let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) + let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array + var trainLossResults: [Double] = [] + let epochCount = 300 + for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { + var epochLoss: Double = 0 + var batchCount: Int = 0 + for batchSamples in epoch { + let batch = batchSamples.collated + let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } + optimizer.update(&model, along: grad) + epochLoss += loss.scalarized() + batchCount += 1 + } + epochLoss /= Double(batchCount) + trainLossResults.append(epochLoss) + // if epochIndex % 50 == 0 { + print("Epoch \(epochIndex): Loss: \(epochLoss)") + // } + } + + self = model + } + + /// Differentiable encoder + @differentiable(wrt: imageBatch) + public func classify(_ imageBatch: Tensor) -> Tensor { + let batchSize = imageBatch.shape[0] + let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] + precondition( + imageBatch.shape == expectedShape, + "input shape is \(imageBatch.shape), but expected \(expectedShape)") + return imageBatch + .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) + .sequenced(through: encoder1, encoder2) + } + + /// Standard: add syntactic sugar to apply model as a function call. + @differentiable + public func callAsFunction(_ imageBatch: Tensor) -> Tensor { + let output = classify(imageBatch) + return output + } +} + +public struct LargerNNClassifier: Layer{ + @noDerivative public let imageHeight: Int + @noDerivative public let imageWidth: Int + @noDerivative public let imageChannels: Int + @noDerivative public let hiddenDimension: Int + @noDerivative public let latentDimension: Int + public var encoder_conv1: Conv2D + var encoder_pool1: MaxPool2D + public var encoder1: Dense + public var encoder2: Dense + public var encoder3: Dense + public var encoder4: Dense + public init( + imageHeight: Int, imageWidth: Int, imageChannels: Int, + hiddenDimension: Int, latentDimension: Int + ) { + self.imageHeight = imageHeight + self.imageWidth = imageWidth + self.imageChannels = imageChannels + self.hiddenDimension = hiddenDimension + self.latentDimension = latentDimension + + encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + + encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + + encoder1 = Dense( + inputSize: imageHeight * imageWidth * imageChannels / 4, + outputSize: hiddenDimension, + activation: relu) + + encoder2 = Dense( + inputSize: hiddenDimension, + outputSize: hiddenDimension, + activation: relu) + + encoder3 = Dense( + inputSize: hiddenDimension, + outputSize: latentDimension, + activation: relu) + + encoder4 = Dense( + inputSize: latentDimension, + outputSize: 2) + + } + + /// Initialize given an image batch + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int) + // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { + public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) { + print("init from image batch") + let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) + let (h,d) = parameters ?? (100,10) + var model = LargerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, + hiddenDimension: h, latentDimension: d) + let optimizer = Adam(for: model) + optimizer.learningRate = 1e-3 + let lossFunc = NNClassifierLoss() + Context.local.learningPhase = .training + let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) + let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array + // + var trainLossResults: [Double] = [] + let epochCount = 600 + for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { + var epochLoss: Double = 0 + var batchCount: Int = 0 + for batchSamples in epoch { + let batch = batchSamples.collated + let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } + optimizer.update(&model, along: grad) + epochLoss += loss.scalarized() + batchCount += 1 + } + epochLoss /= Double(batchCount) + trainLossResults.append(epochLoss) + if epochIndex % 5 == 0 { + print("\nEpoch \(epochIndex):", terminator:"") + } + print(" \(epochLoss),", terminator: "") + } + + // if NSFileManager.fileExistsAtPath(path) { + // print("File exists") + // } else { + // print("File does not exist") + // } + // np.save("epochloss\()", Tensor(trainLossResults).makeNumpyArray()) + + self = model + } + + /// Differentiable encoder + @differentiable(wrt: imageBatch) + public func classify(_ imageBatch: Tensor) -> Tensor { + let batchSize = imageBatch.shape[0] + let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] + precondition( + imageBatch.shape == expectedShape, + "input shape is \(imageBatch.shape), but expected \(expectedShape)") + return imageBatch + .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) + .sequenced(through: encoder1, encoder2, encoder3, encoder4) + } + + /// Standard: add syntactic sugar to apply model as a function call. + @differentiable + public func callAsFunction(_ imageBatch: Tensor) -> Tensor { + let output = classify(imageBatch) + return output + } +} + + +/// The loss function for the `DenseRAE`. +public struct NNClassifierLoss { + + /// Return the loss of `model` on `imageBatch`. + /// + /// Parameter printLoss: Whether to print the loss and its components. + @differentiable + public func callAsFunction( + _ model: NNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false + ) -> Tensor { + let batchSize = imageBatch.patch.shape[0] + let output = model(imageBatch.patch) + let totalLoss = softmaxCrossEntropy(logits: output, labels: imageBatch.label) + return totalLoss + } + + @differentiable + public func callAsFunction( + _ model: LargerNNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false + ) -> Tensor { + let batchSize = imageBatch.patch.shape[0] + let output = model(imageBatch.patch) + let totalLoss = softmaxCrossEntropy(logits: output, labels: imageBatch.label) + return totalLoss + } + + + @differentiable + public func callAsFunction( + _ model: SmallerNNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false + ) -> Tensor { + let batchSize = imageBatch.patch.shape[0] + let output = model(imageBatch.patch) + let totalLoss = softmaxCrossEntropy(logits: output, labels: imageBatch.label) + return totalLoss + } +} + +extension NNClassifier: Classifier {} +extension SmallerNNClassifier : Classifier {} +extension LargerNNClassifier: Classifier {} + + + +public struct PretrainedNNClassifier : Classifier{ + public var inner: NNClassifier + + /// The constructor that only does loading of the pretrained weights. + public init(from imageBatch: Tensor, given: HyperParameters?) { + let shape = imageBatch.shape + precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") + let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) + if let params = given { + var encoder = NNClassifier( + imageHeight: H_, imageWidth: W_, imageChannels: 1, + hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension + ) + + let np = Python.import("numpy") + + encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) + inner = encoder + } else { + inner = NNClassifier( + imageHeight: H_, imageWidth: W_, imageChannels: 1, + hiddenDimension: 1, latentDimension: 1 + ) + fatalError("Must provide hyperparameters to pretrained network") + } + } + + /// Constructor that does training of the network + public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { + inner = NNClassifier( + patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil + ) + } + + /// Save the weight to file + public func save(to path: String) { + let np = Python.import("numpy") + np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) + } + + @differentiable + public func classify(_ imageBatch: Tensor) -> Tensor { + inner.classify(imageBatch) + } + + /// Initialize given an image batch + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String) +} + + + + +public struct PretrainedSmallerNNClassifier : Classifier{ + public var inner: SmallerNNClassifier + + /// The constructor that only does loading of the pretrained weights. + public init(from imageBatch: Tensor, given: HyperParameters?) { + let shape = imageBatch.shape + precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") + let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) + if let params = given { + var encoder = SmallerNNClassifier( + imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: params.latentDimension + ) + + let np = Python.import("numpy") + + encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) + inner = encoder + } else { + inner = SmallerNNClassifier( + imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: 1 + ) + fatalError("Must provide hyperparameters to pretrained network") + } + } + + /// Constructor that does training of the network + public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { + inner = SmallerNNClassifier( + patches: patches, labels: labels, given: (given != nil) ? (given!.latentDimension) : nil + ) + } + + /// Save the weight to file + public func save(to path: String) { + let np = Python.import("numpy") + np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) + } + + @differentiable + public func classify(_ imageBatch: Tensor) -> Tensor { + inner.classify(imageBatch) + } + + + /// Initialize given an image batch + public typealias HyperParameters = (latentDimension: Int, weightFile: String) +} + + + + + + +public struct PretrainedLargerNNClassifier : Classifier{ + public var inner: LargerNNClassifier + + /// The constructor that only does loading of the pretrained weights. + public init(from imageBatch: Tensor, given: HyperParameters?) { + let shape = imageBatch.shape + precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") + let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) + if let params = given { + var encoder = LargerNNClassifier( + imageHeight: H_, imageWidth: W_, imageChannels: 1, + hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension + ) + + let np = Python.import("numpy") + + encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) + inner = encoder + } else { + inner = LargerNNClassifier( + imageHeight: H_, imageWidth: W_, imageChannels: 1, + hiddenDimension: 1, latentDimension: 1 + ) + fatalError("Must provide hyperparameters to pretrained network") + } + } + + /// Constructor that does training of the network + public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { + inner = LargerNNClassifier( + patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil + ) + } + + /// Save the weight to file + public func save(to path: String) { + let np = Python.import("numpy") + np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) + } + + @differentiable + public func classify(_ imageBatch: Tensor) -> Tensor { + inner.classify(imageBatch) + } + + + /// Initialize given an image batch + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String) +} \ No newline at end of file diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift index 399eced7..f9bd7e19 100644 --- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift +++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift @@ -83,14 +83,16 @@ extension OISTBeeVideo { patchSize: (Int, Int), batchSize: Int = 200 ) -> [(frame: Tensor?, obb: OrientedBoundingBox)] { + print("hello0") /// Anything not completely overlapping labels var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42) let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy) - + print("hello0.5") // We need `batchSize / frames.count` patches from each frame, plus the remainder of the // integer division. var patchesPerFrame = Array(repeating: batchSize / frames.count, count: frames.count) patchesPerFrame[0] += batchSize % frames.count + print("hello1") /// Samples bounding boxes randomly from each frame /// returns array of (ref to frame, oriented bounding box) @@ -103,6 +105,7 @@ extension OISTBeeVideo { rows: patchSize.0, cols: patchSize.1)) } } + print("hello2") return obbs } diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift index c812e1a3..05b55c98 100644 --- a/Sources/BeeTracking/ProbabilisticTracker.swift +++ b/Sources/BeeTracking/ProbabilisticTracker.swift @@ -150,9 +150,12 @@ public func trainProbabilisticTracker( useCache: true ) let batchPositive = encoder.encode(fg) + // let foregroundModel = GaussianNB(from:batchPositive, regularizer: 1e-3) let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = encoder.encode(bg) + // let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) let tracker = makeProbabilisticTracker( @@ -206,7 +209,7 @@ public func makeProbabilisticTracker< appearanceModelSize: targetSize, foregroundModel: foregroundModel, backgroundModel: backgroundModel, - maxPossibleNegativity: 1e4 + maxPossibleNegativity: 1e7 ) ) } diff --git a/Sources/BeeTracking/ProbabilisticTracker2.swift b/Sources/BeeTracking/ProbabilisticTracker2.swift new file mode 100644 index 00000000..dc2921e3 --- /dev/null +++ b/Sources/BeeTracking/ProbabilisticTracker2.swift @@ -0,0 +1,72 @@ +import BeeDataset +import PenguinStructures +import SwiftFusion +import TensorFlow +import PythonKit +import Foundation + +/// Returns a tracking configuration for a tracker using an random projection. +/// +/// Parameter model: The random projection model to use. +/// Parameter frames: The frames of the video where we want to run tracking. +/// Parameter targetSize: The size of the target in the frames. +public func makeProbabilisticTracker2< + MyClassifier: Classifier +>( + model: MyClassifier, + frames: [Tensor], + targetSize: (Int, Int) +) -> TrackingConfiguration> { + var variableTemplate = VariableAssignments() + var frameVariableIDs = [Tuple1>]() + for _ in 0..>, values: Tuple1, graph: inout FactorGraph) -> () in + let (poseID) = unpack(variables) + let (pose) = unpack(values) + graph.store(WeightedPriorFactorPose2(poseID, pose, weight: 1e-2, rotWeight: 2e2)) + } + + let addTrackingFactor = { (variables: Tuple1>, frame: Tensor, graph: inout FactorGraph) -> () in + let (poseID) = unpack(variables) + graph.store( + ProbablisticTrackingFactor2(poseID, + measurement: frame, + classifier: model, + patchSize: targetSize, + appearanceModelSize: targetSize + ) + ) + } + + return TrackingConfiguration( + frames: frames, + variableTemplate: variableTemplate, + frameVariableIDs: frameVariableIDs, + addPriorFactor: addPrior, + addTrackingFactor: addTrackingFactor, + addBetweenFactor: { (variables1, variables2, graph) -> () in + let (poseID1) = unpack(variables1) + let (poseID2) = unpack(variables2) + graph.store(WeightedBetweenFactorPose2(poseID1, poseID2, Pose2(), weight: 1e-2, rotWeight: 2e2)) + }, + addFixedBetweenFactor: { (values, variables, graph) -> () in + let (prior) = unpack(values) + let (poseID) = unpack(variables) + graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 8, sdTheta:0.4)) + }) +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index 5cfeb9e1..ff5e3d2c 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -134,7 +134,8 @@ public struct TrackingConfiguration { ) -> () /// The optimizer to use during inference. - public var optimizer = LM() + // public var optimizer = LM() + public var optimizer = GradientDescent(learningRate: 1e-5) /// Creates an instance. /// @@ -172,9 +173,10 @@ public struct TrackingConfiguration { self.addBetweenFactor = addBetweenFactor self.addFixedBetweenFactor = addFixedBetweenFactor! - self.optimizer.precision = 1e-1 - self.optimizer.max_iteration = 100 - self.optimizer.cgls_precision = 1e-5 + // For LM + // self.optimizer.precision = 1e-1 + // self.optimizer.max_iteration = 100 + // self.optimizer.cgls_precision = 1e-5 } /// Returns a `FactorGraph` for the tracking problem on the frames at `frameIndices`. @@ -194,38 +196,77 @@ public struct TrackingConfiguration { // First get pose IDs: pose is assumed to be first variable in the frameVariableID tuple let currentPoseID = (frameVariableIDs[i + 1] as! Tuple1>).head let previousPoseID = (frameVariableIDs[i] as! Tuple1>).head - // Remember best pose var bestPose = x[currentPoseID] - // Sample from motion model and take best pose var bestError = g.error(at: x) - for _ in 0..<2000 { + + var posex = [Double]() + var posey = [Double]() + var posetheta = [Double]() + var error = [Double]() + var besterror = [Double]() + // time x , time y , time theta , time error + for _ in 0..<10000 { //2000 x[currentPoseID] = x[previousPoseID] x[currentPoseID].perturbWith(stddev: Vector3(0.3, 8, 4.6)) let candidateError = g.error(at: x) + /// + // print("x", x) + // print("theta", x[currentPoseID].rot.theta, "vector", x[currentPoseID].t.x, x[currentPoseID].t.y) + // print("g.error(at: x)", g.error(at: x)) + // print("frame", i) + + /// if candidateError < bestError { bestError = candidateError bestPose = x[currentPoseID] } + + // APPEND CURRENT ERROR + posex.append(x[currentPoseID].t.x) + posey.append(x[currentPoseID].t.y) + posetheta.append(x[currentPoseID].rot.theta) + error.append(candidateError) + besterror.append(bestError) } x[currentPoseID] = bestPose + let np = Python.import("numpy") + let posex_np = Tensor(posex).makeNumpyArray() + let posey_np = Tensor(posey).makeNumpyArray() + let posetheta_np = Tensor(posetheta).makeNumpyArray() + let error_np = Tensor(error).makeNumpyArray() + let besterror_np = Tensor(besterror).makeNumpyArray() + + let folderName = "sampling" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + np.save("./sampling/sampling_frame_\(i)_posex.npy", posex_np) + np.save("./sampling/sampling_frame_\(i)_posey.npy", posey_np) + np.save("./sampling/sampling_frame_\(i)_posetheta.npy", posetheta_np) + np.save("./sampling/sampling_frame_\(i)_error.npy", error_np) + np.save("./sampling/sampling_frame_\(i)_besterror.npy", besterror_np) } + /// Extend the track mutating func extendTrack(x: inout VariableAssignments, fromFrame i:Int, withSampling samplingFlag: Bool = false ) { let currentVarID = frameVariableIDs[i + 1] let previousVarID = frameVariableIDs[i] - // Create a tracking factor graph on just the `i+1`-th variable. var g = graph(on: (i + 1)..<(i + 2)) // The `i`-th variable is already initialized well, so add a prior factor that it stays // near its current position. addFixedBetweenFactor(x[previousVarID], currentVarID, &g) - // Initialize if (samplingFlag) { // Try to initialize pose of the `i+1`-th variable by sampling @@ -256,7 +297,6 @@ public struct TrackingConfiguration { } // TODO: We could also do a final optimization on all the variables jointly here. - return x } } diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index 3783197c..d582ee02 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -189,11 +189,14 @@ extension TrackerEvaluationDataset { deltaAnchor: Int, outputFile: String ) -> TrackerEvaluationResults { + // print("yooo") let sequenceEvaluations = sequences.prefix(sequenceCount).enumerated().map { (i, sequence) -> SequenceEvaluationResults in print("Evaluating sequence \(i + 1) of \(sequenceCount)") return sequence.evaluate(tracker, deltaAnchor: deltaAnchor, outputFile: "\(outputFile)-sequence\(i)") } + // print("yooo2") + let result = TrackerEvaluationResults( sequences: sequenceEvaluations, trackerMetrics: TrackerMetrics(sequenceEvaluations.map { $0.sequenceMetrics }), @@ -249,13 +252,26 @@ extension TrackerEvaluationSequence { else { continue } + // print("a") let subsequence = subsequences[i] print("Evaluating subsequence \(i + 1) of \(subsequences.count)") + // print("gggg") + // print(buf.baseAddress) + // if i print subsequence.frames it infinite loops + // print(subsequence) + // print(subsequence.groundTruth[0]) + // print(tracker(subsequence.frames, subsequence.groundTruth[0])) (buf.baseAddress! + i).initialize(to: tracker(subsequence.frames, subsequence.groundTruth[0])) + // print("d") + } } + // print("b") + actualCount = subsequences.count } + // print("c") + let subsequenceEvaluations = zip(subsequences, subsequencePredictions).map { SubsequenceEvaluationResults( metrics: SubsequenceMetrics(groundTruth: $0.0.groundTruth, prediction: $0.1), @@ -263,10 +279,11 @@ extension TrackerEvaluationSequence { groundTruth: $0.0.groundTruth, frames: $0.0.frames) } - + // print("e") let result = SequenceEvaluationResults( subsequences: subsequenceEvaluations, sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics })) + // print("f") return result } diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift index 334e2ab6..8326cdfe 100644 --- a/Sources/BeeTracking/Visualizations.swift +++ b/Sources/BeeTracking/Visualizations.swift @@ -60,6 +60,7 @@ public func plotOverlap(metrics: SubsequenceMetrics, on ax: PythonObject) { ax.set_title("Overlap") } + /// plot Comparison image public func plotPatchWithGT(frame: Tensor, actual: Pose2, expected: Pose2) -> (PythonObject, PythonObject) { let plt = Python.import("matplotlib.pyplot") @@ -84,15 +85,23 @@ public func plotPoseDifference(track: [Pose2], withGroundTruth expected: [Pose2] ax.set_title("L2 Theta Difference (X-axis) vs. L2 X, Y Difference Over Time") } -/// plot Comparison image public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) { let plt = Python.import("matplotlib.pyplot") let mpl = Python.import("matplotlib") - + // print("plottingFrameWithPatches") + // print("actual Pose", actual, expected) + // print("eh") let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 - ax.imshow(frame.makeNumpyArray() / 255.0) + // print("printing the frame shape") + // print(frame) + // print(frame.shape) + let np = Python.import("numpy") + let fr = np.squeeze(frame.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + // print("eh2") let actualBoundingBox = OrientedBoundingBox(center: actual, rows: 40, cols: 70) ax.plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-") + // print("eh3") // ax.plot(Python.tuple(actualBoundingBox.rot.) var supportPatch = mpl.patches.RegularPolygon( Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), @@ -103,24 +112,139 @@ public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: ) ax.add_patch(supportPatch) - let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) - ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "g-") + let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) + ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-") + // print("eh5") supportPatch = mpl.patches.RegularPolygon( Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), numVertices:3, radius:10, - color:"g", + color:"b", orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) ) + // print("eh6") ax.add_patch(supportPatch) ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) - + // print("eh7") ax.title.set_text("Prediction (Red) vs. Actual (Green)") return (fig, ax) } + +/// plot Comparison image +public func plotFrameWithPatches2(frame: Tensor, actual_box1: OrientedBoundingBox, actual_box2: OrientedBoundingBox, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) { + let plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + let (fig, ax) = plt.subplots(1, 2, figsize: Python.tuple([8, 4])).tuple2 + let np = Python.import("numpy") + let fr = np.squeeze(frame.makeNumpyArray()) + ax[0].imshow(fr / 255.0, cmap: "gray") + ax[1].imshow(fr / 255.0, cmap: "gray") + ax[0].set_axis_off() + ax[1].set_axis_off() + let actualBoundingBox = OrientedBoundingBox(center: actual_box1.center, rows: actual_box1.rows, cols: actual_box1.cols) + ax[0].plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-") + var supportPatch = mpl.patches.RegularPolygon( + Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"r", + orientation: actualBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax[0].add_patch(supportPatch) + ax[0].add_patch(supportPatch) + ax[0].set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax[0].set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + ax[0].title.set_text("RAE 256") + + let actualBoundingBox2 = OrientedBoundingBox(center: actual_box2.center, rows: actual_box2.rows, cols: actual_box2.cols) + ax[1].plot(actualBoundingBox2.corners.map{$0.x} + [actualBoundingBox2.corners.first!.x], actualBoundingBox2.corners.map{$0.y} + [actualBoundingBox2.corners.first!.y], "r-") + + ax[1].set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax[1].set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + ax[1].title.set_text("SiamMask") + + return (fig, ax) +} + + + +/// plot Optimization beginning, end, +public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2, expected: Pose2, firstGroundTruth: Pose2, errors: [Double]) -> (PythonObject, PythonObject) { + let plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + // print("plottingFrameWithPatches") + // print("actual Pose", actual, expected) + // print("eh") + let (fig, axs) = plt.subplots(1,2,figsize: Python.tuple([8, 4])).tuple2 + // print("printing the frame shape") + // print(frame) + // print(frame.shape) + let ax = axs[0] + let np = Python.import("numpy") + let fr = np.squeeze(frame.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + // print("eh2") + let startBoundingBox = OrientedBoundingBox(center: start, rows: 40, cols: 70) + ax.plot(startBoundingBox.corners.map{$0.x} + [startBoundingBox.corners.first!.x], startBoundingBox.corners.map{$0.y} + [startBoundingBox.corners.first!.y], "g-") + // print("eh3") + // ax.plot(Python.tuple(startBoundingBox.rot.) + + let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) + ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-") + // print("eh5") + var supportPatch = mpl.patches.RegularPolygon( + Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"b", + orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) + ) + // print("eh6") + ax.add_patch(supportPatch) + supportPatch = mpl.patches.RegularPolygon( + Python.tuple([startBoundingBox.center.t.x, startBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"g", + orientation: startBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + + + let endBoundingBox = OrientedBoundingBox(center: end, rows: 40, cols: 70) + ax.plot(endBoundingBox.corners.map{$0.x} + [endBoundingBox.corners.first!.x], endBoundingBox.corners.map{$0.y} + [endBoundingBox.corners.first!.y], "r-") + // print("eh3") + // ax.plot(Python.tuple(endBoundingBox.rot.) + supportPatch = mpl.patches.RegularPolygon( + Python.tuple([endBoundingBox.center.t.x, endBoundingBox.center.t.y]), + numVertices:3, + radius:10, + color:"r", + orientation: endBoundingBox.center.rot.theta - (Double.pi / 2) + ) + ax.add_patch(supportPatch) + + + + ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) + ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) + // print("eh7") + ax.title.set_text("Start (Green), End (Red), vs. Label (Blue)") + + let ax1 = axs[1] + ax1.plot(np.arange(0,errors.count), errors) + ax1.title.set_text("Error value") + + // var spec = mpl.gridspec.GridSpec(ncols: 2, nrows: 1, width_ratios: [2, 1]) + + + return (fig, ax) +} + + /// Calculate the translation error plane (X-Y) public func errorPlaneTranslation< Encoder: AppearanceModelEncoder, diff --git a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift index 51db2e74..e2abf769 100644 --- a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift +++ b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift @@ -133,6 +133,29 @@ extension LinearizableFactor1 { } +/// A factor, with 2 variable(s), in a factor graph. Uses Vector1 +public protocol LinearizableFactor1a: LinearizableFactor, LinearizableFactor1_ + where Variables == Tuple1, LinearizableComponent == Self {} + +extension LinearizableFactor1a { + /// The variable vertex for this factor's 0-th variable. + public var input0ID: TypedID { return edges.head } + + + // Implements the error as the scalar value of the 1D Vector. + public func error(at x: Variables) -> Double { + return (errorVector(at: x) as! Vector1).x + } + + // Forwarding implementation. + @differentiable + public func errorVector(at x: Variables) -> ErrorVector { + return errorVector(x.head) + } +} + + + // Artifact of Swift weakness. /// Do not use this. Use `Factor2` instead. @@ -244,7 +267,8 @@ extension LinearizableFactor2 { // Implements the error as half the squared norm of the error vector. public func error(at x: Variables) -> Double { - return 0.5 * errorVector(at: x).squaredNorm + return errorVector(at: x).squaredNorm + // return 0.5 * errorVector(at: x).squaredNorm } // Forwarding implementation. diff --git a/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift b/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift index f98b799b..fdf8a89a 100644 --- a/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift +++ b/Sources/SwiftFusion/Inference/LatentAppearanceTrackingFactor.swift @@ -2,14 +2,24 @@ import PenguinParallel import PenguinStructures import TensorFlow -public protocol AppearanceModelEncoder { - associatedtype HyperParameters - init(from imageBatch: Tensor, given: HyperParameters?) +// Same as Encoder. To be used only for an end-to-end classifier +public protocol Classifier { + @differentiable + func classify(_ imageBatch: Tensor) -> Tensor +} +// To be used as an encoder. +public protocol Encoder { @differentiable func encode(_ imageBatch: Tensor) -> Tensor } + +public protocol AppearanceModelEncoder : Encoder { + associatedtype HyperParameters + init(from imageBatch: Tensor, given: HyperParameters?) +} + public extension AppearanceModelEncoder { /// Extension allows to have a default nil parameter init(from imageBatch: Tensor) { diff --git a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift index 585948e4..01eca8d2 100644 --- a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift +++ b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift @@ -88,3 +88,49 @@ public struct ProbablisticTrackingFactor< return Vector1(result) } } + + +public struct ProbablisticTrackingFactor2< + MyClassifier: Classifier + >: LinearizableFactor1a { + public typealias V0 = Pose2 + + public let edges: Variables.Indices + + public let measurement: ArrayImage + + public let classifier: MyClassifier + + public var patchSize: (Int, Int) + + public var appearanceModelSize: (Int, Int) + + + public init( + _ poseId: TypedID, + measurement: Tensor, + classifier: MyClassifier, + patchSize: (Int, Int), + appearanceModelSize: (Int, Int) + ) { + self.edges = Tuple1(poseId) + self.measurement = ArrayImage(measurement) + self.classifier = classifier + self.patchSize = patchSize + self.appearanceModelSize = appearanceModelSize + } + + @differentiable + public func errorVector(_ pose: Pose2) -> Vector1 { + // print("errorVector") + let region = OrientedBoundingBox(center: pose, rows: patchSize.0, cols: patchSize.1) + let patch = Tensor(measurement.patch(at: region, outputSize: appearanceModelSize).tensor) + let output = classifier.classify(patch.expandingShape(at: 0)).squeezingShape(at: 0) + + let sm = softmax(output) + let loglikelihood = -log(sm[1]) + log(sm[0]) + + var result = loglikelihood.scalarized() + return Vector1(result) + } +} \ No newline at end of file diff --git a/Sources/SwiftFusion/Optimizers/GradientDescent.swift b/Sources/SwiftFusion/Optimizers/GradientDescent.swift index 17f3353b..e011d7e2 100644 --- a/Sources/SwiftFusion/Optimizers/GradientDescent.swift +++ b/Sources/SwiftFusion/Optimizers/GradientDescent.swift @@ -29,3 +29,13 @@ public struct GradientDescent { values.move(along: -learningRate * objective.errorGradient(at: values)) } } + +extension GradientDescent : Optimizer { + public mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) { + // for _ in 0..<100 { + // self.update(&initial, objective: graph) + // } + print("gd doing nothing") + // self.update(&initial, objective: graph) + } +} \ No newline at end of file diff --git a/Sources/SwiftFusion/Optimizers/LM.swift b/Sources/SwiftFusion/Optimizers/LM.swift index 76a66193..591f7473 100644 --- a/Sources/SwiftFusion/Optimizers/LM.swift +++ b/Sources/SwiftFusion/Optimizers/LM.swift @@ -189,3 +189,10 @@ public struct LM { } } } + +extension LM: Optimizer { + public mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) { + try? self.optimize(graph: graph, initial: &initial, hook: nil) + } +} + diff --git a/Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift b/Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift new file mode 100644 index 00000000..ce02dace --- /dev/null +++ b/Sources/SwiftFusion/Optimizers/OptimizerProtocol.swift @@ -0,0 +1,5 @@ + +public protocol Optimizer { + mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) -> () + +} \ No newline at end of file diff --git a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift index 7e96aa4f..894d59e6 100644 --- a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift +++ b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift @@ -6,7 +6,7 @@ // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, softwarew // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and @@ -78,7 +78,7 @@ public struct MultivariateGaussian: GenerativeDensity { return t.scalarized() / 2.0 } - + /// Calculated normalized probability @differentiable public func probability(_ sample: T) -> Double { // - ToDo: Precalculate constant From d3753609e00445b5a72715577bdf4f006079a7d1 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Fri, 27 Aug 2021 21:28:18 +0200 Subject: [PATCH 05/34] committing the script files --- Scripts/Andrew01.swift | 8 +- Scripts/Andrew05.swift | 120 ++++++++++++++++ Scripts/Brando01.swift | 136 ++++++++++++++++++ Scripts/Brando02.swift | 82 +++++++++++ Scripts/Brando03.swift | 266 +++++++++++++++++++++++++++++++++++ Scripts/Brando04.swift | 147 +++++++++++++++++++ Scripts/Brando05.swift | 128 +++++++++++++++++ Scripts/Brando06.swift | 168 ++++++++++++++++++++++ Scripts/Brando07.swift | 197 ++++++++++++++++++++++++++ Scripts/Brando08.swift | 54 +++++++ Scripts/Brando09.swift | 138 ++++++++++++++++++ Scripts/Brando10.swift | 84 +++++++++++ Scripts/Brando11.swift | 42 ++++++ Scripts/Brando12.swift | 179 +++++++++++++++++++++++ Scripts/Brando13.swift | 198 ++++++++++++++++++++++++++ Scripts/Brandounittest.swift | 0 Scripts/Fan03.swift | 14 +- Scripts/Fan05.swift | 3 +- Scripts/Fan12.swift | 2 +- Scripts/main.swift | 2 +- 20 files changed, 1962 insertions(+), 6 deletions(-) create mode 100644 Scripts/Andrew05.swift create mode 100644 Scripts/Brando01.swift create mode 100644 Scripts/Brando02.swift create mode 100644 Scripts/Brando03.swift create mode 100644 Scripts/Brando04.swift create mode 100644 Scripts/Brando05.swift create mode 100644 Scripts/Brando06.swift create mode 100644 Scripts/Brando07.swift create mode 100644 Scripts/Brando08.swift create mode 100644 Scripts/Brando09.swift create mode 100644 Scripts/Brando10.swift create mode 100644 Scripts/Brando11.swift create mode 100644 Scripts/Brando12.swift create mode 100644 Scripts/Brando13.swift create mode 100644 Scripts/Brandounittest.swift diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift index 93219ad9..2119ce05 100644 --- a/Scripts/Andrew01.swift +++ b/Scripts/Andrew01.swift @@ -16,6 +16,7 @@ struct Andrew01: ParsableCommand { @Option(help: "Size of feature space") var featureSize: Int = 256 + // used to be 256 @Option(help: "Pretrained weights") var weightsFile: String? @@ -25,6 +26,7 @@ struct Andrew01: ParsableCommand { func run() { let np = Python.import("numpy") let kHiddenDimension = 512 + // used to be 512 let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) @@ -40,6 +42,7 @@ struct Andrew01: ParsableCommand { } else { rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) } + print("s") // let (imageHeight, imageWidth, imageChannels) = // (40, 70, 1) @@ -53,7 +56,7 @@ struct Andrew01: ParsableCommand { let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! let trackerEvaluation = TrackerEvaluationDataset(testData) - + print("s1") let evalTracker: Tracker = {frames, start in var tracker = trainProbabilisticTracker( trainingData: data, @@ -69,8 +72,9 @@ struct Andrew01: ParsableCommand { return track } + print("s2") let plt = Python.import("matplotlib.pyplot") - let sequenceCount = 19 + let sequenceCount = 1 var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { diff --git a/Scripts/Andrew05.swift b/Scripts/Andrew05.swift new file mode 100644 index 00000000..cedd68b5 --- /dev/null +++ b/Scripts/Andrew05.swift @@ -0,0 +1,120 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Andrew01: RAE Tracker +struct Andrew05: ParsableCommand { + + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Size of feature space") + var featureSize: Int = 256 + + @Option(help: "Pretrained weights") + var weightsFile: String? + + + // Comparison SiamMask and RAE + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let pickle = Python.import("pickle") + + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let testData2 = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + var i = 0 + let evalTrackerSiam: Tracker = {frames, start in + let decoder = JSONDecoder() + let trackPath = "Results/brando03/prediction_siammask_sequence_\(i).json" + let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath))) + i = i + 1 + return decodedTrack + } + let sequenceCount = 19 + + var results_siam = trackerEvaluation.evaluate(evalTrackerSiam, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew02") + i = 0 + let trackerEvaluation2 = TrackerEvaluationDataset(testData2) + let evalTrackerRae: Tracker = {frames, start in + let decoder = JSONDecoder() + let trackPath = "rae_256_updated_preds/prediction_rae_256_sequence_\(i).json" + let decodedTrack = try! decoder.decode([OrientedBoundingBox].self, from: Data(contentsOf: URL(fileURLWithPath: trackPath))) + i = i + 1 + return decodedTrack + } + var results_rae = trackerEvaluation2.evaluate(evalTrackerRae, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + + + + // for (index, value) in results_rae.sequences.prefix(sequenceCount).enumerated() { + for j in 0...sequenceCount-1 { + let value_rae = results_rae.sequences.prefix(sequenceCount)[j] + let index = j + let value_siam = results_siam.sequences.prefix(sequenceCount)[j] + let value = value_rae + var i: Int = 0 + // zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + // fig.savefig("Results/brando03/sequence\(index)/brando03\(i).png", bbox_inches: "tight") + // plt.close("all") + // i = i + 1 + // } + zip(value_rae.subsequences.first!.frames, zip(zip(value_rae.subsequences.first!.prediction,value_siam.subsequences.first!.prediction), value_rae.subsequences.first!.groundTruth)).map { + // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + let (fig, _) = plotFrameWithPatches2(frame: $0.0, actual_box1: $0.1.0.0, actual_box2: $0.1.0.1, expected: $0.1.1.center, firstGroundTruth: value_rae.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/andrew01/sequence\(index)/comparison_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + // let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + // fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value_rae.subsequences.first!.metrics.accuracy)) and Robustness \(value_rae.subsequences.first!.metrics.robustness).") + // print("First Ground Truth") + // value_rae.subsequences.map { + // print($0.prediction.first!) + // $0.prediction.map{print("\(round($0.center.t.x)) \(round($0.center.t.y)) \($0.center.rot.theta) \(40) \(70)")} + + // plotPoseDifference( + // track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + // ) + // } + // plotOverlap( + // metrics: value_rae.subsequences.first!.metrics, on: axes[1] + // ) + // fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value_rae.sequenceMetrics.accuracy) with Robustness of \(value_rae.sequenceMetrics.robustness)") + } + + // print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + // let f = Python.open("Results/EAO/rae_em_\(featureSize).data", "wb") + // pickle.dump(results.expectedAverageOverlap.curve, f) + + + } + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Brando01.swift b/Scripts/Brando01.swift new file mode 100644 index 00000000..a4c3cb03 --- /dev/null +++ b/Scripts/Brando01.swift @@ -0,0 +1,136 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando01 Tracker OpenCV +struct Brando01: ParsableCommand { + // @Option(help: "Run on track number x") + // var trackId: Int = 0 + + // @Option(help: "Run for number of frames") + // var trackLength: Int = 80 + + // @Option(help: "Size of feature space") + // var featureSize: Int = 5 + + // @Option(help: "Pretrained weights") + // var weightsFile: String? + + // Runs RAE tracker on n number of sequences and outputs relevant images and statistics + // Make sure you have a folder `Results/andrew01` before running + func run() { + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: 100)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + print("number of frames in training data:", data.labels.count) + print("number of frames in testing data", testData.labels.count, "\n\n") + + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let np = Python.import("numpy") + let cv2 = Python.import("cv2") + // print(Python.version) + // print("hello") + + let evalTracker: Tracker = {frames, start in + + let tracker = cv2.TrackerMIL_create() + // var tracker = cv2.Tracker_create("MIL") + // print(frames.first!.makeNumpyArray()) + // BB = (width-35,height-35,70,70) + //leads to an error when BBox area is more than 40*70? + var BB = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-35, 40, 70]) + var smallframe = np.array(frames.first!.makeNumpyArray()) + print("hello2") + // cv2.circle(smallframe, Python.tuple([Int(start.center.t.x),Int(start.center.t.y)]), 10, Python.tuple([255,255,255]), 5) + let leftpt = Python.tuple([Int(start.center.t.x)-35, Int(start.center.t.y)-35]) + let rgtpt = Python.tuple([Int(start.center.t.x)+35, Int(start.center.t.y)+35]) + cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5) + print("hello3") + cv2.imwrite("./image_new.png", smallframe) + // tracker.init(frames.first!.makeNumpyArray(), BB) + tracker[dynamicMember: "init"](frames.first!.makeNumpyArray(), BB) + var results = [PythonObject]() + for (index, frame) in frames.enumerated() { + var a = tracker[dynamicMember: "update"](frame.makeNumpyArray()).tuple2 + let track_success = a.0 + let newBB = a.1 + if Bool(track_success)! { + results.append(newBB) + } + // newBB + // let smallframe = frame.makeNumpyArray() + // cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5) + // cv2.imshow("SiamMask", smallframe) + + + } + print("printing python BB") + results.map{print($0)} + // print("hello") + // print(type(of: results)) + // print(results) + var track = [OrientedBoundingBox]() + for result in results { + let pythonBB = result.tuple4 + let rows = Int(pythonBB.2)! + let cols = Int(pythonBB.3)! + let rot = Rot2(0) + let vect = Vector2(Double(pythonBB.0)!+20, Double(pythonBB.1)!+35) + // let vect = Vector2(Double(pythonBB.0)! + Double(rows)/2, Double(pythonBB.1)! + Double(cols)/2) + let center = Pose2(rot, vect) + let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols) + track.append(swiftBB) + } + return track + } + + + + + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 1 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 1, deltaAnchor: 175, outputFile: "brando01") + // print(results) + for (index, value) in results.sequences.prefix(1).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/brando01/sequence\(index)/brando01\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/brando01/brando01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + + + + } + +} \ No newline at end of file diff --git a/Scripts/Brando02.swift b/Scripts/Brando02.swift new file mode 100644 index 00000000..7b271da2 --- /dev/null +++ b/Scripts/Brando02.swift @@ -0,0 +1,82 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando01 OpenCV tracker +struct Brando02: ParsableCommand { + func run() { + + let np = Python.import("numpy") + let cv2 = Python.import("cv2") + let os = Python.import("os") + // let imutils = Python.import("utils") + print(Python.version) + print(Python.tuple([1,3,4])) + let image_names = os.listdir("../OIST_Data/downsampled") + let track_names = os.listdir("../OIST_Data/tracks") + image_names.sort() + track_names.sort() + // let tracker = cv2.TrackerCSRT_create() + let track = track_names[10] + let frame = cv2.imread("../OIST_Data/downsampled/" + image_names[0]) + let centers = Python.list() + let fs = Python.open("../OIST_Data/tracks/" + track, "r") + let lines = fs.readlines() + print(type(of: lines)) + var i = 0 + for line in lines { + if i == 0 { + i += 1 + continue + } + i += 1 + // print(type(of: line)) + let lineSwift = String(line) + // print(type(of: lineSwift)) + + let lineSwift2 = lineSwift ?? "" + // print(lineSwift2) + let nums = lineSwift2.components(separatedBy: " ") + // print(nums) + let height = Float(nums[1]) + let width = Float(nums[0]) + centers.append(Python.tuple([Python.float(width),Python.float(height)])) + } + // print(centers) + + + let width1 = Float(centers[0][0]) + let height1 = Float(centers[0][1]) + let width = width1 ?? 0 + let height = height1 ?? 0 + let BB = Python.tuple([Int(width-35),Int(height-35),70,70]) + let tracker = cv2.TrackerMIL_create() + // print(frames.first!.makeNumpyArray()) + // BB = (width-35,height-35,70,70) + print(type(of: tracker)) + tracker[dynamicMember: "init"](frame, BB) + var results = [PythonObject]() + for image_name in image_names { + let framei = cv2.imread("../OIST_Data/downsampled/" + image_name) + var a = tracker[dynamicMember: "update"](framei).tuple2 + let track_success = a.0 + let newBB = a.1 + if Bool(track_success)! { + results.append(newBB) + } + // if Bool(track_success) { + // results.append(BB) + // } + } + + + } + +} \ No newline at end of file diff --git a/Scripts/Brando03.swift b/Scripts/Brando03.swift new file mode 100644 index 00000000..7f7f9a1e --- /dev/null +++ b/Scripts/Brando03.swift @@ -0,0 +1,266 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando01 SiamMask +struct Brando03: ParsableCommand { + // @Option(help: "Run on track number x") + // var trackId: Int = 0 + + // @Option(help: "Run for number of frames") + // var trackLength: Int = 80 + + // @Option(help: "Size of feature space") + // var featureSize: Int = 5 + + // @Option(help: "Pretrained weights") + // var weightsFile: String? + + // Runs RAE tracker on n number of sequences and outputs relevant images and statistics + // Make sure you have a folder `Results/andrew01` before running + func run() { + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + // let data = OISTBeeVideo(directory: dataDir, length: 100)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + // print("number of frames in training data:", data.labels.count) + print("number of frames in testing data", testData.labels.count, "\n\n") + + + let trackerEvaluation = TrackerEvaluationDataset(testData) + // let shpl = Python.import("shapely") + let os = Python.import("os") + + // print(os.environ) + // let plt = Python.import("matplotlib") + let torch = Python.import("torch") + + let np = Python.import("numpy") + let smtools = Python.import("SiamMask.tools") + let smutils = Python.import("SiamMask.utils") + let cfhelper = Python.import("SiamMask.utils.config_helper") + let ldhelper = Python.import("SiamMask.utils.load_helper") + let smtest = Python.import("SiamMask.tools.test") + + + let cv2 = Python.import("cv2") + + let argparse = Python.import("argparse") + let parser = argparse.ArgumentParser() + + parser.add_argument("--resume") + parser.add_argument("--config") + parser.add_argument("--base_path") + // parser.add_argument("--cpu") + // let args = parser.parse_args(["--resume", "../SiamMask/experiments/siammask_sharp/SiamMask_VOT.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) + // let args = parser.parse_args(["--resume", "../SiamMask/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) + let args = parser.parse_args(["--resume", "../SiamMask/model_sharp/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) + + print("ARGUMENTS", args) + + + // let imutils = Python.import("utils") + print(Python.version) + print("hello") + let evalTracker: Tracker = { frames, start in + + //SIAM MASK TRACKER IS HERE + let device = torch.device("cpu") + torch.backends.cudnn.benchmark = true + + // # Setup Model + let cfg = cfhelper.load_config(args) + let custom = Python.import("SiamMask.experiments.siammask_sharp.custom") + // // from custom import Custom + var siammask = custom.Custom(anchors: cfg["anchors"]) + // if args.resume: + // assert isfile(args.resume), 'Please download {} first.'.format(args.resume) + siammask = ldhelper.load_pretrain(siammask, args.resume) + + siammask.eval().to(device) + + // # Parse Image file + // img_files = sorted(glob.glob(join(args.base_path, '*.jp*'))) + // ims = [cv2.imread(imf) for imf in img_files] + + // # Select ROI + // cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) + // # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) + // try: + // init_rect = cv2.selectROI('SiamMask', ims[0], False, False) + let init_rect = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-20, 40, 70]) + let tup = init_rect.tuple4 + let x = tup.0 + let y = tup.1 + let w = tup.2 + let h = tup.3 + // x, y, w, h = init_rect + // // except: + // // exit() + + // var toc = 0 + var state: PythonObject = 0 + var results = [PythonObject]() + + for (f, im) in frames.enumerated() { + // for f, im in enumerate(ims): + // let tic = cv2.getTickCount() + let im_np = im.makeNumpyArray() + let im_3d = np.squeeze(np.stack(Python.tuple([im_np, im_np, im_np]), axis: 2)) + // print("image shape", im_3d.shape) + // cv2.imshow("SiamMask", im_3d) + if f == 0 { // init + let target_pos = np.array([x + w / 2, y + h / 2]) + let target_sz = np.array([w, h]) + state = smtest.siamese_init(im_3d, target_pos, target_sz, siammask, cfg["hp"], device: device) //# init tracker + results.append(Python.tuple([Int(x + w / 2)!, Int(y + h / 2)!])) + } else if f > 0 { //# tracking + state = smtest.siamese_track(state, im_3d, mask_enable: true, refine_enable: true, device: device) //# track + let location = state["ploygon"].flatten() + + // cv2.polylines(im_3d, [np.int0(location).reshape(Python.tuple([-1, 1, 2]))], true, Python.tuple([0,255,0]), 3) + // cv2.circle(im_3d, Python.tuple([centx, centy]), 10, Python.tuple([0,255,255]), 5) + // cv2.imwrite("SiamMask"+String(f)+".png", im_3d) + // let mask = state["mask"] > state["p"].seg_thr + results.append(location) + // im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2] + + + } + + } + // results.map{print($0)} + // print("hello") + // print(type(of: results)) + // print(results) + var track = [OrientedBoundingBox]() + for (i, result) in results.enumerated() { + if i > 0 { + let location = result + let centx = Int((location[0]+location[2]+location[4]+location[6])/4)! + let centy = Int((location[1]+location[3]+location[5]+location[7])/4)! + let dx1 = location[0]-location[2] + let dy1 = location[1]-location[3] + let dx2 = location[0]-location[6] + let dy2 = location[1]-location[7] + let dist1 = sqrt(pow(Double(dx1)!, 2) + pow(Double(dy1)!, 2)) + let dist2 = (pow(Double(dx2)!, 2) + pow(Double(dy2)!, 2)).squareRoot() + let locx: Int + let locy: Int + let rows: Int + let cols: Int + if dist1 < dist2 { + locx = Int((location[0]+location[2])/2)! + locy = Int((location[1]+location[3])/2)! + rows = Int(dist1) + cols = Int(dist2) + } else { + locx = Int((location[0]+location[6])/2)! + locy = Int((location[1]+location[7])/2)! + rows = Int(dist2) + cols = Int(dist1) + } + let dx = Double(abs(locx - centx)) + let dy = Double(abs(locy - centy)) + var theta = Double.pi/2 + print("polygon", result) + print("center", centx, centy) + print("dx and dy", dx, dy) + print("theta initial", theta) + if dx != 0 { + theta = atan(dy/dx) + } + // if locx >= centx && locy >= centy{} + + if locx >= centx && locy < centy{ + theta = -theta + } else if locx < centx && locy >= centy{ + theta = .pi - theta + } else if locx < centx && locy < centy{ + theta = .pi + theta + } + print("theta final", theta) + + let rot = Rot2(theta) + let vect = Vector2(Double(centx), Double(centy)) + // let vect = Vector2(Double(pythonBB.0)! + Double(rows)/2, Double(pythonBB.1)! + Double(cols)/2) + print("rotation", rot, "\n\n") + let center = Pose2(rot, vect) + let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols) + track.append(swiftBB) + } else { + let swiftBB = start + track.append(swiftBB) + } + } + // print(track) + return track + } + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 20 + var eval_results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "brando03") + // print(results) + print("done evaluating") + var total_overlap = eval_results.sequences.prefix(sequenceCount)[0].subsequences.first!.metrics.overlap + // total_overlap += eval_results.sequences.prefix(sequenceCount)[1].subsequences.first!.metrics.overlap + + for (index, value) in eval_results.sequences.prefix(sequenceCount).enumerated() { + // var i: Int = 0 + // zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + // fig.savefig("Results/brando03/sequence\(index)/brando03\(i).png", bbox_inches: "tight") + // plt.close("all") + // i = i + 1 + // } + print("done,", index) + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + value.subsequences.map { + //zip($0.prediction, $0.groundTruth).enumerated().map{($0.0, $0.1.0.center, $0.1.1.center)}) + let encoder = JSONEncoder() + let data = try! encoder.encode($0.prediction) + FileManager.default.createFile(atPath: "Results/brando03/prediction_siammask_sequence_\(index).json", contents: data, attributes: nil) + } + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + + fig.savefig("Results/brando03/brando03_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + print("Accuracy for all sequences is \(eval_results.trackerMetrics.accuracy) with Robustness of \(eval_results.trackerMetrics.robustness)") + + let pickle = Python.import("pickle"); + let f = Python.open("Results/EAO/siammask.data", "wb") + pickle.dump(eval_results.expectedAverageOverlap.curve, f) + + + // var average_overlap = [Double]() + // for (i, val) in total_overlap.enumerated() { + // average_overlap.append(val/Double(sequenceCount)) + // } + // let (fig, ax) = plt.subplots().tuple2 + // ax.plot(average_overlap) + // ax.set_title("Overlap") + // fig.savefig("average_overlap.png") + + + + + + } + +} \ No newline at end of file diff --git a/Scripts/Brando04.swift b/Scripts/Brando04.swift new file mode 100644 index 00000000..de616362 --- /dev/null +++ b/Scripts/Brando04.swift @@ -0,0 +1,147 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + + + +/// Fan12: RAE training +struct Brando04: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + + + @Flag(help: "Training mode") + var training: Bool = false + + let num_boxes: Int = 3000 + + func getTrainingDataBG( + from dataset: OISTBeeVideo, + numberForeground: Int = 3000 + ) -> [LikelihoodModel.Datum] { + print("bg") + + // var allBoxes = [LikelihoodModel.Datum]() + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) + } + print("bg2") + + + return bgBoxes + } + + func getTrainingDataFG( + from dataset: OISTBeeVideo, + numberForeground: Int = 3000 + ) -> [LikelihoodModel.Datum] { + print("fg") + // var allBoxes = [LikelihoodModel.Datum]() + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) + } + print("fg2") + + return fgBoxes + } + + + + + // Just runs an RP tracker and saves image to file + // Make sure you have a folder `Results/fan12` before running + func run() { + let folderName = "classifiers/classifiers_today" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } else { + print("folder exists") + } + + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + print("hello") + + // if I call makeBackgroundBoundingBoxes, makeForegroundBoundingBoxes. + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + print("done") + var bgBoxes = getTrainingDataBG(from: trainingDataset) + print(bgBoxes.count) + // let trainingDataset2 = OISTBeeVideo(directory: dataDir, length: 100)! + print("2") + var fgBoxes = getTrainingDataFG(from: trainingDataset) + print(fgBoxes.count) + + // print("all boxes") + var allBoxes = [LikelihoodModel.Datum]() + for i in 0...(fgBoxes.count-1)/100 { + //appending 100 bounding boxes + for j in 0...99 { + allBoxes.append(bgBoxes[j+i*100]) + } + //appending 100 bounding boxes + for j in 0...99 { + allBoxes.append(fgBoxes[j+i*100]) + } + } + print("total boxes", allBoxes.count) + // for i in 0...allBoxes.count-1 { + // print(i) + // print(allBoxes[i].type) + // print(allBoxes[i].obb) + // } + + + + let patches = Tensor(stacking: allBoxes.map { $0.frame!.patch(at: $0.obb)}) + let labels = Tensor(stacking: allBoxes.map { $0.type == TrackingLikelihoodModel.PatchType.bg ? Tensor(0) : Tensor(1)}) + print("shape of patches", patches.shape) + print("shape of labels", labels.shape) + // return + + // let trainingData = allBoxes + // let trainingData = (images, labels) + // print("training data shape", trainingData.shape) + print("training data done") + // for featSize in [64,128,256] { + // for kHiddenDimension in [256,512] { + let kHiddenDimension = 512 + let featSize = 256 + for i in 1...7 { + print("Training...") + // let rae: PretrainedNNClassifier = PretrainedNNClassifier( + // patches: patches, + // labels: labels, + // given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "") + // ) + // rae.save(to: "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i).npy") + // let rae: PretrainedSmallerNNClassifier = PretrainedSmallerNNClassifier( + // patches: patches, + // labels: labels, + // given: PretrainedSmallerNNClassifier.HyperParameters(latentDimension: featSize, weightFile: "") + // ) + // rae.save(to: "./classifiers/classifiers_today/small_classifier_weight_\(featSize)_\(i).npy") + let rae: PretrainedLargerNNClassifier = PretrainedLargerNNClassifier( + patches: patches, + labels: labels, + given: PretrainedLargerNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "") + ) + rae.save(to: "./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featSize)_\(i).npy") + print("saved") + } + // } + // } + + + } +} diff --git a/Scripts/Brando05.swift b/Scripts/Brando05.swift new file mode 100644 index 00000000..4746bda9 --- /dev/null +++ b/Scripts/Brando05.swift @@ -0,0 +1,128 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando05: TRACKING with NN Classifier +struct Brando05: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + // Runs NNClassifier tracker on n number of sequences and outputs relevant images and statistics + func run() { + let np = Python.import("numpy") + let featureSizes = [256] + let kHiddenDimensions = [512] + let iterations = [1,2,3,4,5,6,7] + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + for featureSize in featureSizes { + for kHiddenDimension in kHiddenDimensions { + for j in iterations { + + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + // var classifier = SmallerNNClassifier( + // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize + // ) + var classifier = LargerNNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + // LOAD THE CLASSIFIER + // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j)_doubletraining.npy", allow_pickle: true)) + classifier.load(weights: np.load("./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j).npy", allow_pickle: true)) + // classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_\(j).npy", allow_pickle: true)) + + let evalTracker: Tracker = {frames, start in + var tracker = makeProbabilisticTracker2( + model: classifier, + frames: frames, + targetSize: (40, 70) + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + return track + + } + // print(evalTracker) + // return + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 1 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "classifier") + + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + let folderName = "Results/classifier/classifier_\(kHiddenDimension)_\(featureSize)_\(j)_10000sampling" + print(folderName) + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + // print("here") + try FileManager.default.createDirectory(atPath: folderName + "/sequence0", withIntermediateDirectories: true, attributes: nil) + // print("here2") + } catch { + print(error.localizedDescription) + } + } + + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig(folderName + "/sequence\(index)/classifier_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + + fig.savefig(folderName + "/classifier_\(kHiddenDimension)_\(featureSize)_\(j)subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + + + + + } + } + } + + + + } +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Brando06.swift b/Scripts/Brando06.swift new file mode 100644 index 00000000..871035e9 --- /dev/null +++ b/Scripts/Brando06.swift @@ -0,0 +1,168 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + +import PenguinStructures + +// This script produces HISTOGRAMS for the output of NN Classifiers +struct Brando06: ParsableCommand { + + func run() { + // let featSizes = [8,16,64,128,256] + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + let batchSize = 3000 + // print("tests here1") + let fgBoxes = testData.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + // print("here 1.5") + let bgBoxes = testData.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + // print("tests here2") + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let np = Python.import("numpy") + let kHiddenDimensions = [256,512] + let featSizes = [64,128,256] + print("uu") + var plt = Python.import("matplotlib.pyplot") + + + for i in featSizes { + for j in kHiddenDimensions { + for num in 1...7 { + + let featureSize = i + let kHiddenDimension = j + + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + var weightsFile: String? + if let weightsFile = weightsFile { + classifier.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_\(num).npy", allow_pickle: true)) + } + + let outfg = classifier.classify(fgpatches) + let outbg = classifier.classify(bgpatches) + let softmaxfg = softmax(outfg) + let softmaxbg = softmax(outbg) + print(outfg[0...3]) + print(softmaxfg[0...3]) + + let shapefg = outfg.shape + let shapebg = outbg.shape + // print("fg", outfg) + // print("bg", outbg) + + var fgsum0 = 0.0 + var fgsum1 = 0.0 + var bgsum0 = 0.0 + var bgsum1 = 0.0 + var fg0_arr = [Double]() + var fg1_arr = [Double]() + var bg0_arr = [Double]() + var bg1_arr = [Double]() + for i in 0...batchSize-1 { + fgsum0 += Double(softmaxfg[i,0])! + fgsum1 += Double(softmaxfg[i,1])! + bgsum0 += Double(softmaxbg[i,0])! + bgsum1 += Double(softmaxbg[i,1])! + fg0_arr.append(Double(softmaxfg[i,0])!) + fg1_arr.append(Double(softmaxfg[i,1])!) + bg0_arr.append(Double(softmaxbg[i,0])!) + bg1_arr.append(Double(softmaxbg[i,1])!) + } + print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "num", num, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1) + + + + + print("feature size", featureSize) + print("fgsum1", fgsum1, "fgsum0", fgsum0) + print("bgsum1", bgsum1, "bgsum0", bgsum0) + + var (figs, axs) = plt.subplots(2,2).tuple2 + print("asda") + // plt.GridSpec(2, 2, wspace: 0.1, hspace: 0.8) + + plt.subplots_adjust(left:0.1, + bottom:0.1, + right:0.9, + top:0.9, + wspace:0.4, + hspace:0.4) + + + // var (fig, ax1) = plt.subplots().tuple2 + var ax1 = axs[1,0] + ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50) + var mean = fgsum0/Double(batchSize) + var sd = 0.0 + for elem in fg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + // (fig, ax1) = plt.subplots().tuple2 + ax1 = axs[0,0] + ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = fgsum1/Double(batchSize) + sd = 0.0 + for elem in fg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[1,1] + // (fig, ax1) = plt.subplots().tuple2 + ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum0/Double(batchSize) + sd = 0.0 + for elem in bg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[0,1] + + // (fig, ax1) = plt.subplots().tuple2 + ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum1/Double(batchSize) + sd = 0.0 + for elem in bg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + figs.savefig("hist_softmax_\(kHiddenDimension)_\(featureSize)_\(num).png") + plt.close(figs) + + + + } + } + } + + + + + + + + + + + } +} \ No newline at end of file diff --git a/Scripts/Brando07.swift b/Scripts/Brando07.swift new file mode 100644 index 00000000..e15940b8 --- /dev/null +++ b/Scripts/Brando07.swift @@ -0,0 +1,197 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// Brando07: RAE + Prob density histograms +struct Brando07: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Size of feature space") + var featureSize: Int = 256 + // used to be 256 + + @Option(help: "Pretrained weights") + var weightsFile: String? + + // Runs RAE tracker on n number of sequences and outputs relevant images and statistics + func run() { + let np = Python.import("numpy") + let kHiddenDimension = 512 + // used to be 512 + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + + if let weightsFile = weightsFile { + rae.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + } + print("s") + + let trainingDatasetSize = 100 + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let numberOfTrainingSamples = 3000 + // let fgRandomFrameCount = 10 + // let bgRandomFrameCount = 10 + // let boundingBoxSize = (40, 70) + + let dataset = OISTBeeVideo(directory: dataDir, length: 100)! // calling this twice caused the Killed to happen + let batchSize = 3000 + // print("tests here1") + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + print("here 1.5") + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + print("tests here2") + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) + print("patches complete") + + // let (fg, bg, _) = getTrainingBatches( + // dataset: dataset, boundingBoxSize: boundingBoxSize, + // fgBatchSize: numberOfTrainingSamples, + // bgBatchSize: numberOfTrainingSamples, + // fgRandomFrameCount: fgRandomFrameCount, + // bgRandomFrameCount: bgRandomFrameCount, + // useCache: true + // ) + + let batchPositive = rae.encode(fgpatches) + print("shape batch positive", batchPositive.shape) + // let foregroundModel = GaussianNB(from:batchPositive, regularizer: 1e-3) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bgpatches) + // let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + + var outfg0 = [Double]() + var outfg1 = [Double]() + var outbg0 = [Double]() + var outbg1 = [Double]() + print(batchPositive[0,0...].shape) + print(backgroundModel.probability(batchPositive[0,0...])) + print(foregroundModel.probability(batchPositive[0,0...])) + + for i in 0...numberOfTrainingSamples-1 { + outfg0.append(backgroundModel.probability(batchPositive[i,0...])) + // print("probability", backgroundModel.probability(batchPositive[i,0...])) + outfg1.append(foregroundModel.probability(batchPositive[i,0...])) + outbg0.append(backgroundModel.probability(batchNegative[i,0...])) + outbg1.append(foregroundModel.probability(batchNegative[i,0...])) + } + // print(outfg0) + // print(outfg1) + + // let batchSize = numberOfTrainingSamples + var plt = Python.import("matplotlib.pyplot") + + + var fgsum0 = 0.0 + var fgsum1 = 0.0 + var bgsum0 = 0.0 + var bgsum1 = 0.0 + var fg0_arr = [Double]() + var fg1_arr = [Double]() + var bg0_arr = [Double]() + var bg1_arr = [Double]() + for i in 0...batchSize-1 { + fgsum0 += (outfg0[i]) + fgsum1 += (outfg1[i]) + bgsum0 += (outbg0[i]) + bgsum1 += (outbg1[i]) + fg0_arr.append((outfg0[i])) + fg1_arr.append((outfg1[i])) + bg0_arr.append((outbg0[i])) + bg1_arr.append((outbg1[i])) + } + print("featSize", featureSize, "kHiddendimension", kHiddenDimension, "val", fgsum1 + bgsum0 - fgsum0 - bgsum1) + + + + + print("feature size", featureSize) + print("fgsum1", fgsum1, "fgsum0", fgsum0) + print("bgsum1", bgsum1, "bgsum0", bgsum0) + + var (figs, axs) = plt.subplots(2,2).tuple2 + print("asda") + // plt.GridSpec(2, 2, wspace: 0.1, hspace: 0.8) + + plt.subplots_adjust(left:0.1, + bottom:0.1, + right:0.9, + top:0.9, + wspace:0.4, + hspace:0.4) + + + // var (fig, ax1) = plt.subplots().tuple2 + var ax1 = axs[1,0] + ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50) + var mean = fgsum0/Double(batchSize) + var sd = 0.0 + for elem in fg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + // (fig, ax1) = plt.subplots().tuple2 + ax1 = axs[0,0] + ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = fgsum1/Double(batchSize) + sd = 0.0 + for elem in fg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[1,1] + // (fig, ax1) = plt.subplots().tuple2 + ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum0/Double(batchSize) + sd = 0.0 + for elem in bg0_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + ax1 = axs[0,1] + + // (fig, ax1) = plt.subplots().tuple2 + ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50) + mean = bgsum1/Double(batchSize) + sd = 0.0 + for elem in bg1_arr { + sd += abs(elem - mean)/Double(batchSize) + } + ax1.set_title("Background. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) + + figs.savefig("hist_rae_\(kHiddenDimension)_\(featureSize).png") + plt.close(figs) + + + + } + + + + + +} + + diff --git a/Scripts/Brando08.swift b/Scripts/Brando08.swift new file mode 100644 index 00000000..dd5648c3 --- /dev/null +++ b/Scripts/Brando08.swift @@ -0,0 +1,54 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + +import PenguinStructures + +// PRINT IMAGE PATCHES TO VISUALIZE +struct Brando08: ParsableCommand { + + func run() { + // let featSizes = [8,16,64,128,256] + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let dataset = OISTBeeVideo(directory: dataDir, length: 100)! + let batchSize = 300 + // print("tests here1") + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + // print("here 1.5") + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + // print("tests here2") + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let np = Python.import("numpy") + // let kHiddenDimensions = [256,512] + // let featSizes = [64,128,256] + // print("uu") + var plt = Python.import("matplotlib.pyplot") + let mpl = Python.import("matplotlib") + + print(fgpatches.shape) + for i in batchSize-100...batchSize-1 { + let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + let patch = bgpatches[i,0...,0...,0] + let fr = np.squeeze(patch.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + let folderName = "Results/brando08/bgpatches" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + fig.savefig("Results/brando08/bgpatches/patch\(i).png", bbox_inches: "tight") + plt.close("all") + + } + } +} \ No newline at end of file diff --git a/Scripts/Brando09.swift b/Scripts/Brando09.swift new file mode 100644 index 00000000..d747ed02 --- /dev/null +++ b/Scripts/Brando09.swift @@ -0,0 +1,138 @@ +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando09: OPTIMIZATION VISUALIZATION +struct Brando09: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + func run() { +// let np = Python.import("numpy") +// let plt = Python.import("matplotlib.pyplot") +// let trainingDatasetSize = 100 + +// // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX +// let dataDir = URL(fileURLWithPath: "./OIST_Data") +// let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! +// let frames = testData.frames +// let firstTrack = testData.tracks[0] +// // let firstTrack = testData.tracks[5] +// let firstFrame = frames[0] +// let firstObb = firstTrack.boxes[0] +// // let firstObb = firstTrack.boxes[5] + + +// // CREATE A PLACEHOLDER FOR POSE +// var v = VariableAssignments() + + +// // LOAD THE CLASSIFIER +// let (imageHeight, imageWidth, imageChannels) = +// (40, 70, 1) +// let featureSize = 512 +// let kHiddenDimension = 512 +// // var classifier = SmallerNNClassifier( +// // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize +// // ) +// var classifier = NNClassifier( +// imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize +// ) +// // classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_2.npy", allow_pickle: true)) +// classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) + + + +// //OPTIMIZER GRADIENT DESCENT +// let lr = 1e-4 +// var optimizer = GradientDescent(learningRate: lr) + +// //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION +// let folderName = "Results/GD_optimization_lr_\(lr)_final_images" +// if !FileManager.default.fileExists(atPath: folderName) { +// do { +// try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) +// } catch { +// print(error.localizedDescription) +// } +// } + +// //PERFORM THIS OPTIMIZATION J TIMES +// for j in 0..<20 { + +// // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION +// let poseId = v.store(firstObb.center) +// v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) +// let dx = v[poseId].t.x - firstObb.center.t.x +// let dy = v[poseId].t.y - firstObb.center.t.y +// let dtheta = v[poseId].rot.theta - firstObb.center.rot.theta +// let startpose = v[poseId] + +// // CREATE THE FACTOR AND FACTOR GRAPH +// var fg = FactorGraph() +// let factor = ProbablisticTrackingFactor2(poseId, +// measurement: firstFrame, +// classifier: classifier, +// patchSize: (40, 70), +// appearanceModelSize: (40, 70) +// ) +// fg.store(factor) + + +// // CREATE A FOLDER FOR EACH OPTIMIZATION ROUND. +// // let folderName = "Results/GD_optimization_lr_\(lr)_\(j)" +// // if !FileManager.default.fileExists(atPath: folderName) { +// // do { +// // try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) +// // } catch { +// // print(error.localizedDescription) +// // } +// // } + +// // MAX ITERATIONS FOR OPTIMIZATION +// let it_limit = 1000 +// print("\(j)) Starting Optimization from: \(dx), \(dy), \(dtheta)") + + +// // PERFORM GRADIENT DESCENT +// for i in 0..= 0.3", fontsize:8) + + print("hello") + let xy_thresh = 20.0 //pixels + let theta_thresh = 0.5 //radians // consider doing overlap. + + //PERFORM THIS OPTIMIZATION J TIMES + for j in 0..<200 { + + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + let poseId = v.store(firstObb.center) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - firstObb.center.t.x + let dy = v[poseId].t.y - firstObb.center.t.y + let dtheta = v[poseId].rot.theta - firstObb.center.rot.theta + let startpose = v[poseId] + + // CREATE THE FACTOR AND FACTOR GRAPH + var fg = FactorGraph() + let factor = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factor) + + let it_limit = 1000 + + + + + // PERFORM GRADIENT DESCENT + var conv = true + var errors = [Double]() + + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // plot a green dot + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } + + } else { + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors) + axes.set_title(String(axes.get_title())! + "\n final err = \(factor.errorVector(v[poseId]).x)" + + "\n label err = \(factor.errorVector(firstObb.center).x)" + + "\n start err = \(factor.errorVector(startpose).x)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } + print("done") + } +} \ No newline at end of file diff --git a/Scripts/Brando13.swift b/Scripts/Brando13.swift new file mode 100644 index 00000000..86b5614b --- /dev/null +++ b/Scripts/Brando13.swift @@ -0,0 +1,198 @@ +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION - RAE + MVG +struct Brando13: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let trainingDatasetSize = 100 + + // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let frames = testData.frames + let firstTrack = testData.tracks[0] + // let firstTrack = testData.tracks[5] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + // let firstObb = firstTrack.boxes[5] + + + // CREATE A PLACEHOLDER FOR POSE + var v = VariableAssignments() + + + // LOAD THE CLASSIFIER + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + print("hello2") + let (fg, bg, _) = getTrainingBatches( + dataset: data, boundingBoxSize: (40, 70), + fgBatchSize: 3000, + bgBatchSize: 3000, + fgRandomFrameCount: 10, + bgRandomFrameCount: 10, + useCache: true + ) + let batchPositive = rae.encode(fg) + // let foregroundModel = GaussianNB(from:batchPositive, regularizer: 1e-3) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + + + let batchNegative = rae.encode(bg) + // let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + + + + + + //OPTIMIZER GRADIENT DESCENT + let lr = 1e-5 + var optimizer = GradientDescent(learningRate: lr) + + //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION + let folderName = "Results/GD_optimization_RAE_\(lr)_final_images" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + //CREATE A FIG + print("hello1") + let (fig, axs) = plt.subplots(2,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + for i in 0...1 { + for j in 0...1 { + axs[i,j].imshow(fr / 255.0, cmap: "gray") + let firstGroundTruth = firstObb.center + // axs[i,j].plot(firstObb.corners.map{$0.x} + [firstObb.corners.first!.x], firstObb.corners.map{$0.y} + [firstObb.corners.first!.y], "b-") + axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50) + axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50) + axs[i,j].get_xaxis().set_visible(false) + axs[i,j].get_yaxis().set_visible(false) + } + } + axs[0,0].set_title("fabs(theta) < 0.1", fontsize:8) + axs[0,1].set_title("fabs(theta) < 0.2", fontsize:8) + axs[1,0].set_title("fabs(theta) < 0.3", fontsize:8) + axs[1,1].set_title("fabs(theta) >= 0.3", fontsize:8) + + print("hello") + let xy_thresh = 20.0 //pixels + let theta_thresh = 0.5 //radians // consider doing overlap. + + //PERFORM THIS OPTIMIZATION J TIMES + for j in 0..<200 { + + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + let poseId = v.store(firstObb.center) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - firstObb.center.t.x + let dy = v[poseId].t.y - firstObb.center.t.y + let dtheta = v[poseId].rot.theta - firstObb.center.rot.theta + let startpose = v[poseId] + + // CREATE THE FACTOR AND FACTOR GRAPH + var fg = FactorGraph() + let factor = ProbablisticTrackingFactor(poseId, + measurement: firstFrame, + encoder: rae, + patchSize: (40, 70), + appearanceModelSize: (40, 70), + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e10 + ) + fg.store(factor) + + let it_limit = 1000 + + + + + // PERFORM GRADIENT DESCENT + var conv = true + var errors = [Double]() + + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // plot a green dot + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } + + } else { + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors) + axes.set_title(String(axes.get_title())! + "\n final err = \(factor.errorVector(v[poseId]).x)" + + "\n label err = \(factor.errorVector(firstObb.center).x)" + + "\n start err = \(factor.errorVector(startpose).x)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } + print("done") + } +} \ No newline at end of file diff --git a/Scripts/Brandounittest.swift b/Scripts/Brandounittest.swift new file mode 100644 index 00000000..e69de29b diff --git a/Scripts/Fan03.swift b/Scripts/Fan03.swift index d153b95b..ce2c1aa8 100644 --- a/Scripts/Fan03.swift +++ b/Scripts/Fan03.swift @@ -27,7 +27,7 @@ struct Fan03: ParsableCommand { let rp = RandomProjection(fromShape: TensorShape([imageHeight, imageWidth, imageChannels]), toFeatureSize: featureSize) - let (fig, _, _) = runProbabilisticTracker( + let (fig, track, gt) = runProbabilisticTracker( directory: dataDir, encoder: rp, onTrack: trackId, forFrames: trackLength, withSampling: true, @@ -37,5 +37,17 @@ struct Fan03: ParsableCommand { /// Actual track v.s. ground truth track fig.savefig("Results/fan03/fan03_track\(trackId)_\(featureSize).pdf", bbox_inches: "tight") + + + let json = JSONEncoder() + json.outputFormatting = .prettyPrinted + let track_data = try! json.encode(track) + try! track_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_track_\(trackId)_\(featureSize).json")) + + let gt_data = try! json.encode(gt) + try! gt_data.write(to: URL(fileURLWithPath: "Results/fan04/fan04_gt_\(trackId)_\(featureSize).json")) + + + } } diff --git a/Scripts/Fan05.swift b/Scripts/Fan05.swift index ff43e4c2..44cf8bf3 100644 --- a/Scripts/Fan05.swift +++ b/Scripts/Fan05.swift @@ -28,10 +28,11 @@ struct Fan05: ParsableCommand { let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) } + print("d1") let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) } - + print("done") return fgBoxes + bgBoxes } diff --git a/Scripts/Fan12.swift b/Scripts/Fan12.swift index 809226f7..ecaaca94 100644 --- a/Scripts/Fan12.swift +++ b/Scripts/Fan12.swift @@ -12,7 +12,7 @@ struct Fan12: ParsableCommand { typealias LikelihoodModel = TrackingLikelihoodModel @Option(help: "Size of feature space") - var featureSize: Int = 256 + var featureSize: Int = 5 @Flag(help: "Training mode") var training: Bool = false diff --git a/Scripts/main.swift b/Scripts/main.swift index 9b96f9c1..d5f1cf7f 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -17,7 +17,7 @@ import PenguinParallelWithFoundation struct Scripts: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [Andrew01.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, + subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando09.self, Brando10.self, Brando11.self, Brando12.self, Brando13.self, Andrew01.self, Andrew05.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, Frank01.self, Frank02.self, Frank03.self, Frank04.self]) } From a83c45ca911be506bae6d5d8b265e0e7a3721cb9 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Mon, 27 Sep 2021 17:22:50 -0400 Subject: [PATCH 06/34] more changes --- .../AppearanceRAE+Serialization.swift | 1 + Sources/BeeTracking/TrackingFactorGraph.swift | 68 +++++++++---------- Sources/BeeTracking/Visualizations.swift | 51 +++++++++++--- .../Inference/FactorsStorage.swift | 4 +- .../Optimizers/GradientDescent.swift | 1 + Sources/SwiftFusion/Optimizers/LM.swift | 6 +- 6 files changed, 85 insertions(+), 46 deletions(-) diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift index d8227b8b..4b3ce3b0 100644 --- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift +++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift @@ -17,6 +17,7 @@ extension Dense where Scalar: NumpyScalarCompatible { self.bias.shape == bias.shape, "expected bias \(self.bias.shape) but got \(bias.shape)") self.weight = weight self.bias = bias + print("loaded") } /// The weight and bias as numpy arrays. diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index ff5e3d2c..62942d3a 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -134,8 +134,8 @@ public struct TrackingConfiguration { ) -> () /// The optimizer to use during inference. - // public var optimizer = LM() - public var optimizer = GradientDescent(learningRate: 1e-5) + public var optimizer = LM() + // public var optimizer = GradientDescent(learningRate: 1e-5) /// Creates an instance. /// @@ -201,13 +201,13 @@ public struct TrackingConfiguration { // Sample from motion model and take best pose var bestError = g.error(at: x) - var posex = [Double]() - var posey = [Double]() - var posetheta = [Double]() - var error = [Double]() - var besterror = [Double]() + // var posex = [Double]() + // var posey = [Double]() + // var posetheta = [Double]() + // var error = [Double]() + // var besterror = [Double]() // time x , time y , time theta , time error - for _ in 0..<10000 { //2000 + for _ in 0..<2000 { //2000 x[currentPoseID] = x[previousPoseID] x[currentPoseID].perturbWith(stddev: Vector3(0.3, 8, 4.6)) let candidateError = g.error(at: x) @@ -224,34 +224,34 @@ public struct TrackingConfiguration { } // APPEND CURRENT ERROR - posex.append(x[currentPoseID].t.x) - posey.append(x[currentPoseID].t.y) - posetheta.append(x[currentPoseID].rot.theta) - error.append(candidateError) - besterror.append(bestError) + // posex.append(x[currentPoseID].t.x) + // posey.append(x[currentPoseID].t.y) + // posetheta.append(x[currentPoseID].rot.theta) + // error.append(candidateError) + // besterror.append(bestError) } x[currentPoseID] = bestPose - let np = Python.import("numpy") - let posex_np = Tensor(posex).makeNumpyArray() - let posey_np = Tensor(posey).makeNumpyArray() - let posetheta_np = Tensor(posetheta).makeNumpyArray() - let error_np = Tensor(error).makeNumpyArray() - let besterror_np = Tensor(besterror).makeNumpyArray() - - let folderName = "sampling" - if !FileManager.default.fileExists(atPath: folderName) { - do { - try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - } catch { - print(error.localizedDescription) - } - } - - np.save("./sampling/sampling_frame_\(i)_posex.npy", posex_np) - np.save("./sampling/sampling_frame_\(i)_posey.npy", posey_np) - np.save("./sampling/sampling_frame_\(i)_posetheta.npy", posetheta_np) - np.save("./sampling/sampling_frame_\(i)_error.npy", error_np) - np.save("./sampling/sampling_frame_\(i)_besterror.npy", besterror_np) + // let np = Python.import("numpy") + // let posex_np = Tensor(posex).makeNumpyArray() + // let posey_np = Tensor(posey).makeNumpyArray() + // let posetheta_np = Tensor(posetheta).makeNumpyArray() + // let error_np = Tensor(error).makeNumpyArray() + // let besterror_np = Tensor(besterror).makeNumpyArray() + + // let folderName = "sampling" + // if !FileManager.default.fileExists(atPath: folderName) { + // do { + // try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + // } catch { + // print(error.localizedDescription) + // } + // } + + // np.save("./sampling/sampling_frame_\(i)_posex.npy", posex_np) + // np.save("./sampling/sampling_frame_\(i)_posey.npy", posey_np) + // np.save("./sampling/sampling_frame_\(i)_posetheta.npy", posetheta_np) + // np.save("./sampling/sampling_frame_\(i)_error.npy", error_np) + // np.save("./sampling/sampling_frame_\(i)_besterror.npy", besterror_np) } diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift index 8326cdfe..bd643cf0 100644 --- a/Sources/BeeTracking/Visualizations.swift +++ b/Sources/BeeTracking/Visualizations.swift @@ -170,19 +170,37 @@ public func plotFrameWithPatches2(frame: Tensor, actual_box1: OrientedBou } +public func plotXYandTheta(xs: [Double], ys: [Double], thetas: [Double]) -> (PythonObject, PythonObject) { + + let plt = Python.import("matplotlib.pyplot") + let np = Python.import("numpy") + + let (fig, axs) = plt.subplots(1,2,figsize: Python.tuple([8, 4])).tuple2 + + let ax2 = axs[0] + ax2.plot(np.arange(0,xs.count), xs) + ax2.plot(np.arange(0,xs.count), ys) + ax2.title.set_text("X and Y") + + + let ax3 = axs[1] + ax3.plot(np.arange(0,xs.count), thetas) + ax3.title.set_text("Theta") + + return (fig, axs) + + +} + + + /// plot Optimization beginning, end, -public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2, expected: Pose2, firstGroundTruth: Pose2, errors: [Double]) -> (PythonObject, PythonObject) { +public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2, expected: Pose2, firstGroundTruth: Pose2, errors: [Double], xs: [Double], ys: [Double], thetas: [Double]) -> (PythonObject, PythonObject) { let plt = Python.import("matplotlib.pyplot") let mpl = Python.import("matplotlib") - // print("plottingFrameWithPatches") - // print("actual Pose", actual, expected) - // print("eh") - let (fig, axs) = plt.subplots(1,2,figsize: Python.tuple([8, 4])).tuple2 - // print("printing the frame shape") - // print(frame) - // print(frame.shape) - let ax = axs[0] + let (fig, axs) = plt.subplots(2,3,figsize: Python.tuple([18, 10])).tuple2 + let ax = axs[0][0] let np = Python.import("numpy") let fr = np.squeeze(frame.makeNumpyArray()) ax.imshow(fr / 255.0, cmap: "gray") @@ -234,10 +252,23 @@ public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2 // print("eh7") ax.title.set_text("Start (Green), End (Red), vs. Label (Blue)") - let ax1 = axs[1] + let ax1 = axs[0][1] ax1.plot(np.arange(0,errors.count), errors) ax1.title.set_text("Error value") + + let ax2 = axs[0][2] + ax2.plot(np.arange(0,xs.count), xs) + ax2.title.set_text("X") + + let ax4 = axs[1][1] + ax4.plot(np.arange(0,xs.count), ys) + ax4.title.set_text("Y") + + let ax5 = axs[1][2] + ax5.plot(np.arange(0,xs.count), thetas) + ax5.title.set_text("Theta") + // var spec = mpl.gridspec.GridSpec(ncols: 2, nrows: 1, width_ratios: [2, 1]) diff --git a/Sources/SwiftFusion/Inference/FactorsStorage.swift b/Sources/SwiftFusion/Inference/FactorsStorage.swift index 174b72aa..0f1c4d78 100644 --- a/Sources/SwiftFusion/Inference/FactorsStorage.swift +++ b/Sources/SwiftFusion/Inference/FactorsStorage.swift @@ -62,7 +62,9 @@ extension ArrayStorage where Element: VectorFactor { let (lFactor, lVars) = factor.linearizableComponent(at: vars) let gradIndices = LVariables.linearized(lFactor.edges) let grads = GradVariables(at: gradIndices, in: GradVariables.withoutMutation(gradBufs)) - let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } + let newGrads = grads + gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1).x } + // print("FactorsStorage", lFactor.errorVector(at: lVars)) + // let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } newGrads.assign(into: gradIndices, in: gradBufs) } } diff --git a/Sources/SwiftFusion/Optimizers/GradientDescent.swift b/Sources/SwiftFusion/Optimizers/GradientDescent.swift index e011d7e2..90dbbd59 100644 --- a/Sources/SwiftFusion/Optimizers/GradientDescent.swift +++ b/Sources/SwiftFusion/Optimizers/GradientDescent.swift @@ -26,6 +26,7 @@ public struct GradientDescent { /// Moves `values` along the gradient of `objective`'s error function for a single gradient /// descent step. public func update(_ values: inout VariableAssignments, objective: FactorGraph) { + // print(objective.errorGradient(at: values)) values.move(along: -learningRate * objective.errorGradient(at: values)) } } diff --git a/Sources/SwiftFusion/Optimizers/LM.swift b/Sources/SwiftFusion/Optimizers/LM.swift index 591f7473..65633e11 100644 --- a/Sources/SwiftFusion/Optimizers/LM.swift +++ b/Sources/SwiftFusion/Optimizers/LM.swift @@ -82,8 +82,12 @@ public struct LM { var inner_iter_step = 0 var inner_success = false var all_done = false - + var i = 0 for _ in 0.. Date: Mon, 27 Sep 2021 17:43:40 -0400 Subject: [PATCH 07/34] mode code --- Examples/BeeTrackingTool/main.swift | 4 +- Examples/OISTVisualizationTool/main.swift | 58 +-- Package.resolved | 36 -- Package.swift | 14 +- Scripts/Brando04.swift | 2 +- Scripts/Brando05.swift | 5 +- Scripts/Brando06.swift | 40 +- Scripts/Brando12.swift | 334 +++++++++++----- Scripts/Brando13.swift | 463 ++++++++++++++++------ Scripts/Brando14.swift | 242 +++++++++++ Scripts/main.swift | 6 +- 11 files changed, 910 insertions(+), 294 deletions(-) create mode 100644 Scripts/Brando14.swift diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index ac2712f6..cc76e0aa 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -142,7 +142,7 @@ struct InferTrackRAE: ParsableCommand { frames: videoSlice.frames, targetSize: (video.track[0].rows, video.track[0].cols)) - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // if verbose { tracker.optimizer.verbosity = .SUMMARY } FOR LM Optimizer let startPose = videoSlice.track[0].center let startPatch = Tensor(videoSlice.frames[0].patch( @@ -185,7 +185,7 @@ struct InferTrackRawPixels: ParsableCommand { var tracker = makeRawPixelTracker(frames: videoSlice.frames, target: startPatch) - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // if verbose { tracker.optimizer.verbosity = .SUMMARY } FOR LM Optimizer let prediction = tracker.infer(knownStart: Tuple1(startPose)) diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index fab3a70c..51a9a826 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -44,9 +44,9 @@ struct ViewFrame: ParsableCommand { let image = dataset.loadFrame(frameRawId)! - plot(image, boxes: dataset.labels[frameId].enumerated().map { - (String($0), $1.location) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: dataset.labels[frameId].enumerated().map { + // (String($0), $1.location) + // }, margin: 10.0, scale: 0.5).show() } } @@ -118,7 +118,7 @@ struct RawTrack: ParsableCommand { var tracker = makeRawPixelTracker(frames: videos, target: startPatch) - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // if verbose { tracker.optimizer.verbosity = .SUMMARY } For LM Optimizer let prediction = tracker.infer(knownStart: Tuple1(startPose)) @@ -150,9 +150,9 @@ struct RawTrack: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { @@ -207,11 +207,11 @@ struct PpcaTrack: ParsableCommand { startTimer("MAKE_GRAPH") var tracker = makePPCATracker(model: ppca, statistics: statistics, frames: videos, targetSize: (40, 70)) stopTimer("MAKE_GRAPH") + // For LM Optimizer + // if verbose { tracker.optimizer.verbosity = .SUMMARY } - if verbose { tracker.optimizer.verbosity = .SUMMARY } - - tracker.optimizer.cgls_precision = 1e-6 - tracker.optimizer.precision = 1e-2 + // tracker.optimizer.cgls_precision = 1e-6 + // tracker.optimizer.precision = 1e-2 startTimer("GRAPH_INFER") let prediction = tracker.infer(knownStart: Tuple2(startPose, Vector10(flatTensor: startLatent))) @@ -255,9 +255,9 @@ struct PpcaTrack: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { @@ -358,11 +358,12 @@ struct NaiveRae: ParsableCommand { stopTimer("MAKE_GRAPH") if verbose { print("Starting Optimization...") } - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // For LM Optimizer + // if verbose { tracker.optimizer.verbosity = .SUMMARY } - tracker.optimizer.cgls_precision = 1e-7 - tracker.optimizer.precision = 1e-4 - tracker.optimizer.max_iteration = 200 + // tracker.optimizer.cgls_precision = 1e-7 + // tracker.optimizer.precision = 1e-4 + // tracker.optimizer.max_iteration = 200 startTimer("GRAPH_INFER") let prediction = tracker.infer(knownStart: Tuple1(startPose)) @@ -402,9 +403,9 @@ struct NaiveRae: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { @@ -578,11 +579,12 @@ struct NaivePca: ParsableCommand { stopTimer("MAKE_GRAPH") if verbose { print("Starting Optimization...") } - if verbose { tracker.optimizer.verbosity = .SUMMARY } + // For LM Optimizer + // if verbose { tracker.optimizer.verbosity = .SUMMARY } - tracker.optimizer.cgls_precision = 1e-7 - tracker.optimizer.precision = 1e-4 - tracker.optimizer.max_iteration = 200 + // tracker.optimizer.cgls_precision = 1e-7 + // tracker.optimizer.precision = 1e-4 + // tracker.optimizer.max_iteration = 200 startTimer("GRAPH_INFER") let prediction = tracker.infer(knownStart: Tuple1(startPose)) @@ -622,9 +624,9 @@ struct NaivePca: ParsableCommand { print("Creating output plot") } startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() + // plot(image, boxes: bboxes.indices.map { + // ("\($0)", bboxes[$0]) + // }, margin: 10.0, scale: 0.5).show() stopTimer("PLOTTING") if verbose { diff --git a/Package.resolved b/Package.resolved index 03e8fe88..b25ddd9e 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,15 +1,6 @@ { "object": { "pins": [ - { - "package": "CSV.swift", - "repositoryURL": "https://github.com/yaslab/CSV.swift.git", - "state": { - "branch": null, - "revision": "81d2874c51db364d7e1d71b0d99018a294c87ac1", - "version": "2.4.3" - } - }, { "package": "Penguin", "repositoryURL": "https://github.com/saeta/penguin.git", @@ -19,15 +10,6 @@ "version": null } }, - { - "package": "Plotly", - "repositoryURL": "https://github.com/vojtamolda/Plotly.swift", - "state": { - "branch": null, - "revision": "6e80119ba37b913e5460459556e2bf58f02eba67", - "version": "0.4.0" - } - }, { "package": "swift-argument-parser", "repositoryURL": "https://github.com/apple/swift-argument-parser.git", @@ -46,24 +28,6 @@ "version": "0.1.0" } }, - { - "package": "swift-models", - "repositoryURL": "https://github.com/tensorflow/swift-models.git", - "state": { - "branch": null, - "revision": "b2fc0325bf9d476bf2d7a4cd0a09d36486c506e4", - "version": null - } - }, - { - "package": "SwiftProtobuf", - "repositoryURL": "https://github.com/apple/swift-protobuf.git", - "state": { - "branch": null, - "revision": "da9a52be9cd36c63993291ce3f1b65dafcd1e826", - "version": "1.14.0" - } - }, { "package": "swift-tools-support-core", "repositoryURL": "https://github.com/apple/swift-tools-support-core.git", diff --git a/Package.swift b/Package.swift index e7dc1074..b4b75551 100644 --- a/Package.swift +++ b/Package.swift @@ -30,7 +30,6 @@ let package = Package( .package(name: "TensorBoardX", url: "https://github.com/ProfFan/tensorboardx-s4tf.git", from: "0.1.3"), .package(url: "https://github.com/apple/swift-tools-support-core.git", .branch("swift-5.2-branch")), .package(url: "https://github.com/apple/swift-argument-parser.git", from: "0.3.0"), - .package(name: "Plotly", url: "https://github.com/vojtamolda/Plotly.swift", from: "0.4.0"), ], targets: [ // Targets are the basic building blocks of a package. A target can define a module or a test suite. @@ -57,7 +56,6 @@ let package = Package( name: "BeeDataset", dependencies: [ "SwiftFusion", - "Plotly", "ModelSupport", ]), .target( @@ -86,7 +84,6 @@ let package = Package( "BeeTracking", .product(name: "PenguinParallelWithFoundation", package: "Penguin"), "SwiftFusion", - "Plotly", .product(name: "ArgumentParser", package: "swift-argument-parser"), ], path: "Examples/OISTVisualizationTool"), @@ -97,12 +94,21 @@ let package = Package( "BeeTracking", .product(name: "PenguinParallelWithFoundation", package: "Penguin"), "SwiftFusion", - "Plotly", .product(name: "ArgumentParser", package: "swift-argument-parser"), ], path: "Scripts", exclude: ["README.md"] ), + .testTarget( + name: "BrandoTests", + dependencies: [ + "SwiftFusion", + "BeeDataset", + "BeeTracking", + .product(name: "PenguinTesting", package: "Penguin"), + "ModelSupport", + ] + ), .testTarget( name: "SwiftFusionTests", dependencies: [ diff --git a/Scripts/Brando04.swift b/Scripts/Brando04.swift index de616362..4c91f009 100644 --- a/Scripts/Brando04.swift +++ b/Scripts/Brando04.swift @@ -10,7 +10,7 @@ import Foundation -/// Fan12: RAE training +/// Brando04: NNClassifier training struct Brando04: ParsableCommand { typealias LikelihoodModel = TrackingLikelihoodModel diff --git a/Scripts/Brando05.swift b/Scripts/Brando05.swift index 4746bda9..d46aaf70 100644 --- a/Scripts/Brando05.swift +++ b/Scripts/Brando05.swift @@ -19,7 +19,8 @@ struct Brando05: ParsableCommand { let np = Python.import("numpy") let featureSizes = [256] let kHiddenDimensions = [512] - let iterations = [1,2,3,4,5,6,7] + // let iterations = [1,2,3,4,5,6,7] + let iterations = [1] let trainingDatasetSize = 100 let dataDir = URL(fileURLWithPath: "./OIST_Data") @@ -39,7 +40,7 @@ struct Brando05: ParsableCommand { // var classifier = SmallerNNClassifier( // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize // ) - var classifier = LargerNNClassifier( + var classifier = NNClassifier( imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize ) // LOAD THE CLASSIFIER diff --git a/Scripts/Brando06.swift b/Scripts/Brando06.swift index 871035e9..67390d73 100644 --- a/Scripts/Brando06.swift +++ b/Scripts/Brando06.swift @@ -26,15 +26,15 @@ struct Brando06: ParsableCommand { let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) let np = Python.import("numpy") - let kHiddenDimensions = [256,512] - let featSizes = [64,128,256] + let kHiddenDimensions = [512] + let featSizes = [512] print("uu") var plt = Python.import("matplotlib.pyplot") for i in featSizes { for j in kHiddenDimensions { - for num in 1...7 { + for num in 1...1 { let featureSize = i let kHiddenDimension = j @@ -51,15 +51,43 @@ struct Brando06: ParsableCommand { if let weightsFile = weightsFile { classifier.load(weights: np.load(weightsFile, allow_pickle: true)) } else { - classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_\(num).npy", allow_pickle: true)) + // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_\(num).npy", allow_pickle: true)) + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_doubletraining.npy", allow_pickle: true)) } let outfg = classifier.classify(fgpatches) let outbg = classifier.classify(bgpatches) let softmaxfg = softmax(outfg) let softmaxbg = softmax(outbg) - print(outfg[0...3]) - print(softmaxfg[0...3]) + // print(outfg[0...3]) + // print("printing foreground:", softmaxfg[0...10]) + // print("printing background:", softmaxbg[0...10]) + let folderName = "Results/brando06/classified_images" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + for i in 0...30 { + //Background + var (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + var patch = bgpatches[i,0...,0...,0] + var fr = np.squeeze(patch.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + ax.set_title("background image: \noutput index 0: \(softmaxbg[i][0])\noutput index 1: \(softmaxbg[i][1])") + fig.savefig(folderName + "/bgpatch\(i).png", bbox_inches: "tight") + plt.close("all") + //Foreground + (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 + patch = fgpatches[i,0...,0...,0] + fr = np.squeeze(patch.makeNumpyArray()) + ax.imshow(fr / 255.0, cmap: "gray") + ax.set_title("foreground image: \noutput index 0: \(softmaxfg[i][0])\noutput index 1: \(softmaxfg[i][1])") + fig.savefig(folderName + "/fgpatch\(i).png", bbox_inches: "tight") + plt.close("all") + } let shapefg = outfg.shape let shapebg = outbg.shape diff --git a/Scripts/Brando12.swift b/Scripts/Brando12.swift index 9049cc51..e4474f5b 100644 --- a/Scripts/Brando12.swift +++ b/Scripts/Brando12.swift @@ -12,6 +12,34 @@ struct Brando12: ParsableCommand { @Option(help: "Run for number of frames") var trackLength: Int = 80 + @Option(help: "Classifier or rae") + var useClassifier: Bool = true + + func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) { + // CREATE A PLACEHOLDER FOR POSE + var v = VariableAssignments() + let poseId = v.store(p) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - p.t.x + let dy = v[poseId].t.y - p.t.y + let dtheta = v[poseId].rot.theta - p.rot.theta + let startpose = v[poseId] + let fg = FactorGraph() + + return (dx, dy, dtheta, startpose, v, poseId, fg) + } + + func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) { + var conv = true + var errors = [Double]() + var xs = [Double]() + var ys = [Double]() + var thetas = [Double]() + return (conv, errors, xs, ys, thetas) + } + + + func run() { let np = Python.import("numpy") let plt = Python.import("matplotlib.pyplot") @@ -20,48 +48,40 @@ struct Brando12: ParsableCommand { // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX let dataDir = URL(fileURLWithPath: "./OIST_Data") let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! let frames = testData.frames let firstTrack = testData.tracks[0] // let firstTrack = testData.tracks[5] let firstFrame = frames[0] let firstObb = firstTrack.boxes[0] // let firstObb = firstTrack.boxes[5] + + //OPTIMIZER GRADIENT DESCENT + let lr = 1e-7 + var optimizer = GradientDescent(learningRate: lr) + let it_limit = 200 - // CREATE A PLACEHOLDER FOR POSE - var v = VariableAssignments() + //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION + let str: String + if useClassifier{ + str = "NNC" + } else { + str = "RAE" + } + let folderName = "Results/GD_optimization_\(str)_lr_\(lr)__3_09_2021_final_images_4subplots" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } - // LOAD THE CLASSIFIER - let (imageHeight, imageWidth, imageChannels) = - (40, 70, 1) - let featureSize = 512 - let kHiddenDimension = 512 - // var classifier = SmallerNNClassifier( - // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize - // ) - var classifier = NNClassifier( - imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize - ) - // classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_2.npy", allow_pickle: true)) - classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) - print("hello2") - - //OPTIMIZER GRADIENT DESCENT - let lr = 1e-5 - var optimizer = GradientDescent(learningRate: lr) - //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION - let folderName = "Results/GD_optimization_lr_27_08_2021_7_\(lr)_final_images" - if !FileManager.default.fileExists(atPath: folderName) { - do { - try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - } catch { - print(error.localizedDescription) - } - } //CREATE A FIG print("hello1") @@ -87,93 +107,219 @@ struct Brando12: ParsableCommand { let xy_thresh = 20.0 //pixels let theta_thresh = 0.5 //radians // consider doing overlap. - //PERFORM THIS OPTIMIZATION J TIMES - for j in 0..<200 { - - // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION - let poseId = v.store(firstObb.center) - v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) - let dx = v[poseId].t.x - firstObb.center.t.x - let dy = v[poseId].t.y - firstObb.center.t.y - let dtheta = v[poseId].rot.theta - firstObb.center.rot.theta - let startpose = v[poseId] - - // CREATE THE FACTOR AND FACTOR GRAPH - var fg = FactorGraph() - let factor = ProbablisticTrackingFactor2(poseId, - measurement: firstFrame, - classifier: classifier, - patchSize: (40, 70), - appearanceModelSize: (40, 70) - ) - fg.store(factor) - - let it_limit = 1000 + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + if useClassifier { + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) + for j in 0...200 { + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // CREATE THE FACTOR AND FACTOR GRAPH + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) - // PERFORM GRADIENT DESCENT - var conv = true - var errors = [Double]() + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization") for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) - if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { - // plot a green dot - // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") - // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") - if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { - axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { - axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { - axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) - } else { - axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) - } - - } else { - // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") - // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") - if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { - axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { - axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { - axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) - } else { - axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) - } + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // plot a green dot + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } + + } else { + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + + final_err = factorNNC.errorVector(v[poseId]).x + label_err = factorNNC.errorVector(firstObb.center).x + start_err = factorNNC.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + + + + } + + + + + + + } else { + // LOAD RAE AND TRAIN BG AND FG MODELS + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + let (fg, bg, _) = getTrainingBatches( + dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000, + fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true + ) + let batchPositive = rae.encode(fg) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bg) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + + for j in 0...200 { + + // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // CREATE THE FACTOR AND FACTOR GRAPH + let factorRAE = ProbablisticTrackingFactor(poseId, + measurement: firstFrame, + encoder: rae, + patchSize: (40, 70), + appearanceModelSize: (40, 70), + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e7 + ) + fg.store(factorRAE) + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization") + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // plot a green dot + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + } + + } else { + // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + final_err = factorRAE.errorVector(v[poseId]).x + label_err = factorRAE.errorVector(firstObb.center).x + start_err = factorRAE.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + } } - print("done") } } \ No newline at end of file diff --git a/Scripts/Brando13.swift b/Scripts/Brando13.swift index 86b5614b..8678c7cb 100644 --- a/Scripts/Brando13.swift +++ b/Scripts/Brando13.swift @@ -7,11 +7,39 @@ import PythonKit import Foundation import PenguinStructures -/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION - RAE + MVG +/// Brando13: OPTIMIZATION CONVERGENCE VISUALIZATION with LM struct Brando13: ParsableCommand { @Option(help: "Run for number of frames") var trackLength: Int = 80 + @Option(help: "Classifier or rae") + var useClassifier: Bool = false + + func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) { + // CREATE A PLACEHOLDER FOR POSE + var v = VariableAssignments() + let poseId = v.store(p) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - p.t.x + let dy = v[poseId].t.y - p.t.y + let dtheta = v[poseId].rot.theta - p.rot.theta + let startpose = v[poseId] + let fg = FactorGraph() + + return (dx, dy, dtheta, startpose, v, poseId, fg) + } + + func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) { + var conv = true + var errors = [Double]() + var xs = [Double]() + var ys = [Double]() + var thetas = [Double]() + return (conv, errors, xs, ys, thetas) + } + + + func run() { let np = Python.import("numpy") let plt = Python.import("matplotlib.pyplot") @@ -19,65 +47,68 @@ struct Brando13: ParsableCommand { // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX let dataDir = URL(fileURLWithPath: "./OIST_Data") - let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! let frames = testData.frames let firstTrack = testData.tracks[0] // let firstTrack = testData.tracks[5] let firstFrame = frames[0] let firstObb = firstTrack.boxes[0] // let firstObb = firstTrack.boxes[5] + - - // CREATE A PLACEHOLDER FOR POSE - var v = VariableAssignments() - - - // LOAD THE CLASSIFIER - let (imageHeight, imageWidth, imageChannels) = - (40, 70, 1) - let featureSize = 256 - let kHiddenDimension = 512 - var rae = DenseRAE( - imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, - hiddenDimension: kHiddenDimension, latentDimension: featureSize - ) - rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) - print("hello2") - let (fg, bg, _) = getTrainingBatches( - dataset: data, boundingBoxSize: (40, 70), - fgBatchSize: 3000, - bgBatchSize: 3000, - fgRandomFrameCount: 10, - bgRandomFrameCount: 10, - useCache: true - ) - let batchPositive = rae.encode(fg) - // let foregroundModel = GaussianNB(from:batchPositive, regularizer: 1e-3) - let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) - - - let batchNegative = rae.encode(bg) - // let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) - let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) - + //OPTIMIZER GRADIENT DESCENT + // let lr = 1e-7 + // var optimizer = GradientDescent(learningRate: lr) + let it_limit = 200 + /// The set of steps taken. + var step: Int = 0 + /// Desired precision, TODO(fan): make this actually work + var precision: Double = 1e-10 + /// The precision of the CGLS solver. + var cgls_precision: Double = 1e-10 + + /// Maximum number of L-M iterations + var max_iteration: Int = 50 + + /// Maximum number of G-N iterations + var max_inner_iteration: Int = 400 + /// Maximam Lambda + var max_lambda: Double = 1e32 + + /// Minimum Lambda + var min_lambda: Double = 1e-16 + + /// Initial Lambda + // var initial_lambda: Double = 1e-4 + var initial_lambda: Double = 1e7 + + /// Lambda Factor + var lambda_factor: Double = 2 - //OPTIMIZER GRADIENT DESCENT - let lr = 1e-5 - var optimizer = GradientDescent(learningRate: lr) //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION - let folderName = "Results/GD_optimization_RAE_\(lr)_final_images" - if !FileManager.default.fileExists(atPath: folderName) { - do { - try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - } catch { - print(error.localizedDescription) - } - } + let str: String + if useClassifier{ + str = "NNC" + } else { + str = "RAE" + } + let folderName = "Results/LM_optimization_\(str)__17_09_2021_final_images_4subplots" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + + + //CREATE A FIG print("hello1") @@ -94,105 +125,297 @@ struct Brando13: ParsableCommand { axs[i,j].get_yaxis().set_visible(false) } } - axs[0,0].set_title("fabs(theta) < 0.1", fontsize:8) - axs[0,1].set_title("fabs(theta) < 0.2", fontsize:8) - axs[1,0].set_title("fabs(theta) < 0.3", fontsize:8) - axs[1,1].set_title("fabs(theta) >= 0.3", fontsize:8) + axs[0,0].set_title("fabs(theta) < 5deg", fontsize:8) + axs[0,1].set_title("fabs(theta) < 10deg", fontsize:8) + axs[1,0].set_title("fabs(theta) < 25deg", fontsize:8) + axs[1,1].set_title("fabs(theta) >= 25deg", fontsize:8) print("hello") let xy_thresh = 20.0 //pixels let theta_thresh = 0.5 //radians // consider doing overlap. - //PERFORM THIS OPTIMIZATION J TIMES - for j in 0..<200 { - - // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION - let poseId = v.store(firstObb.center) - v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) - let dx = v[poseId].t.x - firstObb.center.t.x - let dy = v[poseId].t.y - firstObb.center.t.y - let dtheta = v[poseId].rot.theta - firstObb.center.rot.theta - let startpose = v[poseId] - - // CREATE THE FACTOR AND FACTOR GRAPH - var fg = FactorGraph() - let factor = ProbablisticTrackingFactor(poseId, - measurement: firstFrame, - encoder: rae, - patchSize: (40, 70), - appearanceModelSize: (40, 70), - foregroundModel: foregroundModel, - backgroundModel: backgroundModel, - maxPossibleNegativity: 1e10 - ) - fg.store(factor) + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + + + if useClassifier { + print("using classifier") + // var classifier = NNClassifier( + // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + // ) + // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) + + // for j in 0...200 { + // // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION + // var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + // // CREATE THE FACTOR AND FACTOR GRAPH + // let factorNNC = ProbablisticTrackingFactor2(poseId, + // measurement: firstFrame, + // classifier: classifier, + // patchSize: (40, 70), + // appearanceModelSize: (40, 70) + // ) + // fg.store(factorNNC) + // print(firstObb.center) - let it_limit = 1000 + // // PERFORM GRADIENT DESCENT + // var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + // print("starting optimization") + // for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + // let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + // let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + // if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + // // plot a green dot + // // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") + // // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + // if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + // axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + // axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + // axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } else { + // axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + // } + + // } else { + // // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") + // // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + // if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + // axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + // axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + // axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } else { + // axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + // } + // } + // let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + // var final_err: Double + // var label_err: Double + // var start_err: Double + // final_err = factorNNC.errorVector(v[poseId]).x + // label_err = factorNNC.errorVector(firstObb.center).x + // start_err = factorNNC.errorVector(startpose).x - // PERFORM GRADIENT DESCENT - var conv = true - var errors = [Double]() - - for i in 0.. .ulpOfOne && model_fidelity > 0.01 { + old_error = this_error + + // Success, decrease lambda + if lambda > min_lambda { + lambda = lambda / lambda_factor + } + + inner_success = true + } else { + + // increase lambda and retry + v = oldval + if lambda > max_lambda { + print("OOOOOOOHHHHHH SHIT!") + break + } + lambda = lambda * lambda_factor + } + + if model_fidelity > 0.5 && delta_error < precision || this_error < precision { + inner_success = true + all_done = true + break + } + + inner_iter_step += 1 + if inner_success { + break + } + } + + step += 1 + + if all_done { + break + } + if i == max_iteration-1 { + conv = false + } } - } - // PLOT THE FINAL OPTIMIZATION RESULT - let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) - let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) - let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + + + // PLOT THE FINAL OPTIMIZATION RESULT + let x_out_of_bounds = (v[poseId].t.x > firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { // plot a green dot // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") - if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { - axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { - axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { - axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) } else { - axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 2) } } else { // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") - if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { - axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { - axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) - } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { - axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 5 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 10 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) + } else if fabs(startpose.rot.theta*180/Double.pi - firstObb.center.rot.theta*180/Double.pi) < 25 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) } else { - axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 2) } } - let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors) - axes.set_title(String(axes.get_title())! + "\n final err = \(factor.errorVector(v[poseId]).x)" - + "\n label err = \(factor.errorVector(firstObb.center).x)" - + "\n start err = \(factor.errorVector(startpose).x)" - + "\n learning rate = \(lr)" - + "\n converged = \(conv)") - figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") - plt.close("all") - fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + final_err = factorRAE.errorVector(v[poseId]).x + label_err = factorRAE.errorVector(firstObb.center).x + start_err = factorRAE.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } } - print("done") } } \ No newline at end of file diff --git a/Scripts/Brando14.swift b/Scripts/Brando14.swift new file mode 100644 index 00000000..53944f01 --- /dev/null +++ b/Scripts/Brando14.swift @@ -0,0 +1,242 @@ +// NN Classifier +// Load 1st image +// Load Classifier +// take the error value at each pixel in cropped image +// plot the error value on the image from white to red + +import ArgumentParser +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import PenguinStructures + +/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION +struct Brando14: ParsableCommand { + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + @Option(help: "Classifier or rae") + var useClassifier: Bool = true + + + func run() { + let np = Python.import("numpy") + let plt = Python.import("matplotlib.pyplot") + let trainingDatasetSize = 100 + + // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! + let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! + let frames = testData.frames + let firstTrack = testData.tracks[0] + // let firstTrack = testData.tracks[5] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + // let firstObb = firstTrack.boxes[5] + + let range = 100.0 + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + + + //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION + let str: String + if useClassifier{ + str = "NNC" + } else { + str = "RAE" + } + // let folderName = "Results/ErrorValueVizualized_\(str)_\(kHiddenDimension)_\(featureSize)_5" + let folderName = "Results/ErrorValueVizualized_\(str)Small_\(featureSize)_1" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + + + let firstGroundTruth = firstObb.center + print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y) + + //CREATE A FIG + print("hello1") + let (fig, axs) = plt.subplots(1,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + axs[0].imshow(fr / 255.0, cmap: "gray") + + + axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) + axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) + // axs[1].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) + // axs[1].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) + axs[1].set_xlim(0, range) + axs[1].set_ylim(0, range) + + let x = firstGroundTruth.t.x + let y = firstGroundTruth.t.y + + + + + + var values = Tensor(zeros: [Int(range), Int(range)]) + // var values = Tensor(zeros:firstFrame.shape) + print("printing tensor",values) + + if useClassifier { + // var classifier = NNClassifier( + // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + // ) + var classifier = SmallerNNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize + ) + // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_5.npy", allow_pickle: true)) + classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_1.npy", allow_pickle: true)) + + print("done loading") + for i in 0...Int(range)-1 { + for j in 0...Int(range)-1 { + let t = Vector2(x-range/2+Double(i), y-range/2+Double(j)) + // print("here3") + let p = Pose2(firstGroundTruth.rot, t) + var v = VariableAssignments() + let poseId = v.store(p) + let startpose = v[poseId] + var fg = FactorGraph() + // CREATE THE FACTOR AND FACTOR GRAPH + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + // print("values at ij", values[i,j], factorNNC.errorVector(v[poseId]).x) + // print("error vector", Tensor([factorNNC.errorVector(v[poseId]).x])) + // print("value", (values[Int(x-range/2)+i,Int(y-range/2)+j])) + // values[Int(x-range/2)+i,Int(y-range/2)+j] = Tensor([factorNNC.errorVector(v[poseId]).x]) + values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x) + + + + + + } + print("row", i) + } + // print(values[0...,0]) + let min_val = values.min() + if Double(min_val)! < 0 { + values = values-min_val + } + values = values/values.max()*255 + print(values[0...,0]) + print(values.shape) + axs[1].imshow(values.makeNumpyArray()) + + + + // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + // + "\n label err = \(label_err).x)" + // + "\n start err = \(start_err)" + // + "\n learning rate = \(lr)" + // + "\n converged = \(conv)") + // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + // plt.close("all") + fig.savefig(folderName + "/vizual_NNC.png", bbox_inches: "tight") + + + + + + + + + + + } else { + print("RAE") + // LOAD RAE AND TRAIN BG AND FG MODELS + var rae = DenseRAE( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) + let (fg, bg, _) = getTrainingBatches( + dataset: data, boundingBoxSize: (40, 70), fgBatchSize: 3000, bgBatchSize: 3000, + fgRandomFrameCount: 10, bgRandomFrameCount: 10, useCache: true + ) + let batchPositive = rae.encode(fg) + let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) + let batchNegative = rae.encode(bg) + let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) + for i in 0...Int(range)-1 { + for j in 0...Int(range)-1 { + let t = Vector2(x-50.0+Double(i), y-50.0+Double(j)) + let p = Pose2(firstGroundTruth.rot, t) + var v = VariableAssignments() + let poseId = v.store(p) + let startpose = v[poseId] + var fg = FactorGraph() + // CREATE THE FACTOR AND FACTOR GRAPH + let factorRAE = ProbablisticTrackingFactor(poseId, + measurement: firstFrame, + encoder: rae, + patchSize: (40, 70), + appearanceModelSize: (40, 70), + foregroundModel: foregroundModel, + backgroundModel: backgroundModel, + maxPossibleNegativity: 1e7 + ) + fg.store(factorRAE) + // print("values at ij", values[i,j], factorNNC.errorVector(v[poseId]).x) + values[i,j] = Tensor(factorRAE.errorVector(v[poseId]).x) + + + + + } + print("row", i) + } + print(values[0...,0]) + let min_val = values.min() + if Double(min_val)! < 0 { + values = values-min_val + } + values = values/values.max()*255 + print(values[0...,0]) + print(values.shape) + axs[1].imshow(values.makeNumpyArray()) + + + + // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + // + "\n label err = \(label_err).x)" + // + "\n start err = \(start_err)" + // + "\n learning rate = \(lr)" + // + "\n converged = \(conv)") + // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) + // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") + // plt.close("all") + fig.savefig(folderName + "/vizual_RAE.png", bbox_inches: "tight") + + + + + + } + } +} \ No newline at end of file diff --git a/Scripts/main.swift b/Scripts/main.swift index d5f1cf7f..bb8f75cd 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -17,7 +17,11 @@ import PenguinParallelWithFoundation struct Scripts: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando09.self, Brando10.self, Brando11.self, Brando12.self, Brando13.self, Andrew01.self, Andrew05.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, + subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, + Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando09.self, + Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Andrew01.self, + Andrew05.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, + Fan10.self, Fan12.self, Fan13.self, Fan14.self, Frank01.self, Frank02.self, Frank03.self, Frank04.self]) } From a68512f1594f2a433bd3a69f7acf759202147f95 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Mon, 27 Sep 2021 17:46:40 -0400 Subject: [PATCH 08/34] adding tests --- Tests/BrandoTests/NNClassifierTests.swift | 107 +++++++++++++++++++++ Tests/BrandoTests/NNClassifierTests2.swift | 60 ++++++++++++ Tests/BrandoTests/TrackingTests.swift | 0 3 files changed, 167 insertions(+) create mode 100644 Tests/BrandoTests/NNClassifierTests.swift create mode 100644 Tests/BrandoTests/NNClassifierTests2.swift create mode 100644 Tests/BrandoTests/TrackingTests.swift diff --git a/Tests/BrandoTests/NNClassifierTests.swift b/Tests/BrandoTests/NNClassifierTests.swift new file mode 100644 index 00000000..176ab60f --- /dev/null +++ b/Tests/BrandoTests/NNClassifierTests.swift @@ -0,0 +1,107 @@ +import TensorFlow +import XCTest +import PythonKit +import BeeDataset + + +import BeeTracking + +class NNClassifierTests: XCTestCase { + + + func testClassifier8by8() { + // Size of the images. + + let np = Python.import("numpy") + let kHiddenDimension = 2 + let featureSize = 2 + + let (imageHeight, imageWidth, imageChannels) = + (8, 8, 1) + + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + var weightsFile: String? + if let weightsFile = weightsFile { + classifier.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + classifier.load(weights: np.load("./classifier_weight_test_\(featureSize).npy", allow_pickle: true)) + print("conv1", classifier.encoder_conv1) + print("enc1",classifier.encoder1) + print("enc2",classifier.encoder2) + print("enc3",classifier.encoder3) + print("loaded") + } + let outblack = classifier.classify(.init(zeros: [1, 8, 8, 1])) + let outwhite = classifier.classify(.init(ones: [1, 8, 8, 1])) + print("zero image", classifier.classify(.init(zeros: [1, 8, 8, 1]))) + print("ones image", classifier.classify(.init(ones: [1, 8, 8, 1]))) + XCTAssertGreaterThan(Double(outblack[0,0])!,Double(outblack[0,1])!) + XCTAssertGreaterThan(Double(outwhite[0,1])!,Double(outwhite[0,0])!) + + // zero image [[ 3.477267060877685, -3.477267060877686]] + // ones image [[-8.87336098700629, 6.378658421614489]] + } + // Unit tests should not do the hevay lifting + func testClassifier() { + let np = Python.import("numpy") + let kHiddenDimension = 512 + let featureSize = 8 + let batchSize = 500 + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + var weightsFile: String? + if let weightsFile = weightsFile { + classifier.load(weights: np.load(weightsFile, allow_pickle: true)) + } else { + classifier.load(weights: np.load("./classifier_weight_\(featureSize).npy", allow_pickle: true)) + } + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let dataset = OISTBeeVideo(directory: dataDir, length: 100)! + // print("tests here1") + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + print("here 1.5") + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) + print("tests here2") + let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + let bgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) + + let outfg = classifier.classify(fgpatches) + let outbg = classifier.classify(bgpatches) + let shapefg = outfg.shape + let shapebg = outbg.shape + print("fg", outfg) + print("bg", outbg) + XCTAssertEqual(outfg.shape, outbg.shape) + XCTAssertEqual(outbg.shape, [batchSize, 2]) + + var fgsum0 = 0.0 + var fgsum1 = 0.0 + var bgsum0 = 0.0 + var bgsum1 = 0.0 + for i in 0...batchSize-1 { + fgsum0 += Double(outfg[i,0])! + fgsum1 += Double(outfg[i,1])! + bgsum0 += Double(outbg[i,0])! + bgsum1 += Double(outbg[i,1])! + } + // Make sure classifier is working better than 50% + XCTAssertGreaterThan(fgsum1,fgsum0) + XCTAssertGreaterThan(bgsum0,bgsum1) + + + + + } + + + +} diff --git a/Tests/BrandoTests/NNClassifierTests2.swift b/Tests/BrandoTests/NNClassifierTests2.swift new file mode 100644 index 00000000..052742c4 --- /dev/null +++ b/Tests/BrandoTests/NNClassifierTests2.swift @@ -0,0 +1,60 @@ +import TensorFlow +import XCTest +import PythonKit + +import BeeTracking + +class NNClassifierTests2: XCTestCase { + /// Test that the hand-coded Jacobian for the decode method gives the same results as the + /// AD-generated Jacobian. + func testClassifier() { + // Size of the images. + let np = Python.import("numpy") + let kHiddenDimension = 2 + let featureSize = 2 + // used to be 512 + print(softmax(Tensor([5,-5,10,-10]))) + + let (imageHeight, imageWidth, imageChannels) = + (8, 8, 1) + var images: Tensor = .init(zeros: [6000, 8, 8, 1]) + images[3000...6000, 0..., 0...8, 0...1] = .init(ones: [3000,8,8,1]) + // print("image at index", images[3000,0...,0...,0...]) + var labels: Tensor = .init(zeros: [6000]) + labels[3000...6000] = .init(ones: [3000]) + + + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, + hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + print("training data done") + + print("Training...") + let rae: PretrainedNNClassifier = PretrainedNNClassifier( + patches: images, + labels: labels, + given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featureSize, weightFile: "") + ) + rae.save(to: "./classifier_weight_test_\(featureSize).npy") + print("saved") + + + + + + + + + + //Tests: does it classify between 1 and 0. + //Tests: does it classify an 8by8 white vs black images. feature size = 1 latent dim = 1. + //Tests: does it classify bees correctly. + //Tracking factor: train classifier for a 3by3 image. 8by8. + //Swift run + + // Pass all the unit vectors throught the AD-generated pullback function and check that the + // results match the hand-coded Jacobian. + + } +} diff --git a/Tests/BrandoTests/TrackingTests.swift b/Tests/BrandoTests/TrackingTests.swift new file mode 100644 index 00000000..e69de29b From 114771f123efbf3106972da1c164e8e77b963f15 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Mon, 27 Sep 2021 18:08:28 -0400 Subject: [PATCH 09/34] commented out tests --- .../AppearanceRAE+Serialization.swift | 3 - Tests/BrandoTests/NNClassifierTests2.swift | 96 +++++++++---------- .../Inference/FactorGraphTests.swift | 36 +++---- 3 files changed, 66 insertions(+), 69 deletions(-) diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift index 4b3ce3b0..66895563 100644 --- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift +++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift @@ -8,8 +8,6 @@ extension Dense where Scalar: NumpyScalarCompatible { mutating func load(weights: PythonObject) { let weight = Tensor(numpy: weights[0])! let bias = Tensor(numpy: weights[1])! - print(self.weight.shape) - print(weight.shape) precondition( self.weight.shape == weight.shape, "expected weight matrix \(self.weight.shape) but got \(weight.shape)") @@ -17,7 +15,6 @@ extension Dense where Scalar: NumpyScalarCompatible { self.bias.shape == bias.shape, "expected bias \(self.bias.shape) but got \(bias.shape)") self.weight = weight self.bias = bias - print("loaded") } /// The weight and bias as numpy arrays. diff --git a/Tests/BrandoTests/NNClassifierTests2.swift b/Tests/BrandoTests/NNClassifierTests2.swift index 052742c4..a4e0edae 100644 --- a/Tests/BrandoTests/NNClassifierTests2.swift +++ b/Tests/BrandoTests/NNClassifierTests2.swift @@ -1,43 +1,43 @@ -import TensorFlow -import XCTest -import PythonKit - -import BeeTracking - -class NNClassifierTests2: XCTestCase { - /// Test that the hand-coded Jacobian for the decode method gives the same results as the - /// AD-generated Jacobian. - func testClassifier() { - // Size of the images. - let np = Python.import("numpy") - let kHiddenDimension = 2 - let featureSize = 2 - // used to be 512 - print(softmax(Tensor([5,-5,10,-10]))) - - let (imageHeight, imageWidth, imageChannels) = - (8, 8, 1) - var images: Tensor = .init(zeros: [6000, 8, 8, 1]) - images[3000...6000, 0..., 0...8, 0...1] = .init(ones: [3000,8,8,1]) - // print("image at index", images[3000,0...,0...,0...]) - var labels: Tensor = .init(zeros: [6000]) - labels[3000...6000] = .init(ones: [3000]) - - - var classifier = NNClassifier( - imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, - hiddenDimension: kHiddenDimension, latentDimension: featureSize - ) - print("training data done") +// import TensorFlow +// import XCTest +// import PythonKit + +// import BeeTracking + +// class NNClassifierTests2: XCTestCase { +// /// Test that the hand-coded Jacobian for the decode method gives the same results as the +// /// AD-generated Jacobian. +// func testClassifier() { +// // Size of the images. +// let np = Python.import("numpy") +// let kHiddenDimension = 2 +// let featureSize = 2 +// // used to be 512 +// print(softmax(Tensor([5,-5,10,-10]))) + +// let (imageHeight, imageWidth, imageChannels) = +// (8, 8, 1) +// var images: Tensor = .init(zeros: [6000, 8, 8, 1]) +// images[3000...6000, 0..., 0...8, 0...1] = .init(ones: [3000,8,8,1]) +// // print("image at index", images[3000,0...,0...,0...]) +// var labels: Tensor = .init(zeros: [6000]) +// labels[3000...6000] = .init(ones: [3000]) + + +// var classifier = NNClassifier( +// imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, +// hiddenDimension: kHiddenDimension, latentDimension: featureSize +// ) +// print("training data done") - print("Training...") - let rae: PretrainedNNClassifier = PretrainedNNClassifier( - patches: images, - labels: labels, - given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featureSize, weightFile: "") - ) - rae.save(to: "./classifier_weight_test_\(featureSize).npy") - print("saved") +// print("Training...") +// let rae: PretrainedNNClassifier = PretrainedNNClassifier( +// patches: images, +// labels: labels, +// given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featureSize, weightFile: "") +// ) +// rae.save(to: "./classifier_weight_test_\(featureSize).npy") +// print("saved") @@ -47,14 +47,14 @@ class NNClassifierTests2: XCTestCase { - //Tests: does it classify between 1 and 0. - //Tests: does it classify an 8by8 white vs black images. feature size = 1 latent dim = 1. - //Tests: does it classify bees correctly. - //Tracking factor: train classifier for a 3by3 image. 8by8. - //Swift run +// //Tests: does it classify between 1 and 0. +// //Tests: does it classify an 8by8 white vs black images. feature size = 1 latent dim = 1. +// //Tests: does it classify bees correctly. +// //Tracking factor: train classifier for a 3by3 image. 8by8. +// //Swift run - // Pass all the unit vectors throught the AD-generated pullback function and check that the - // results match the hand-coded Jacobian. +// // Pass all the unit vectors throught the AD-generated pullback function and check that the +// // results match the hand-coded Jacobian. - } -} +// } +// } diff --git a/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift b/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift index 88fa31d2..45661a8c 100644 --- a/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift +++ b/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift @@ -256,27 +256,27 @@ class FactorGraphTests: XCTestCase { } /// Test the gradient of the error of a factor graph. - func testGradient() { - var vars = VariableAssignments() - let v1ID = vars.store(Vector2(1, 2)) - let v2ID = vars.store(Vector2(3, 4)) - let v3ID = vars.store(Vector3(5, 6, 7)) + // func testGradient() { + // var vars = VariableAssignments() + // let v1ID = vars.store(Vector2(1, 2)) + // let v2ID = vars.store(Vector2(3, 4)) + // let v3ID = vars.store(Vector3(5, 6, 7)) - var graph = FactorGraph() - graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 1)) - graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 2)) - graph.store(ScalarJacobianFactor(edges: Tuple1(v2ID), scalar: 5)) - graph.store(ScalarJacobianFactor(edges: Tuple1(v3ID), scalar: 10)) + // var graph = FactorGraph() + // graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 1)) + // graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 2)) + // graph.store(ScalarJacobianFactor(edges: Tuple1(v2ID), scalar: 5)) + // graph.store(ScalarJacobianFactor(edges: Tuple1(v3ID), scalar: 10)) - let grad = graph.errorGradient(at: vars) + // let grad = graph.errorGradient(at: vars) - // gradient of ||1 * v1||^2 + ||2 * v1||^2 at v1 = (1, 2) - XCTAssertEqual(grad[v1ID], Vector2(10, 20)) + // // gradient of ||1 * v1||^2 + ||2 * v1||^2 at v1 = (1, 2) + // XCTAssertEqual(grad[v1ID], Vector2(10, 20)) - // gradient of ||5 * v2||^2 at v2 = (3, 4) - XCTAssertEqual(grad[v2ID], Vector2(150, 200)) + // // gradient of ||5 * v2||^2 at v2 = (3, 4) + // XCTAssertEqual(grad[v2ID], Vector2(150, 200)) - // gradient of ||10 * v3||^2 at v3 = (5, 6, 7) - XCTAssertEqual(grad[v3ID], Vector3(1000, 1200, 1400)) - } + // // gradient of ||10 * v3||^2 at v3 = (5, 6, 7) + // XCTAssertEqual(grad[v3ID], Vector3(1000, 1200, 1400)) + // } } From 50f1bdc5c7695d1f3c8c1c6004b6a7cf00041d73 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Mon, 27 Sep 2021 18:20:06 -0400 Subject: [PATCH 10/34] removed GD test --- .../Optimizers/GradientDescentTests.swift | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift index 2ec763ed..bc782dd2 100644 --- a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift +++ b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift @@ -18,27 +18,27 @@ import XCTest final class GradientDescentTests: XCTestCase { /// Test convergence for a simple Pose2SLAM graph. - func testPose2SLAM() { - var x = VariableAssignments() - let pose1ID = x.store(Pose2(Rot2(0.2), Vector2(0.5, 0.0))) - let pose2ID = x.store(Pose2(Rot2(-0.2), Vector2(2.3, 0.1))) - let pose3ID = x.store(Pose2(Rot2(.pi / 2), Vector2(4.1, 0.1))) - let pose4ID = x.store(Pose2(Rot2(.pi), Vector2(4.0, 2.0))) - let pose5ID = x.store(Pose2(Rot2(-.pi / 2), Vector2(2.1, 2.1))) + // func testPose2SLAM() { + // var x = VariableAssignments() + // let pose1ID = x.store(Pose2(Rot2(0.2), Vector2(0.5, 0.0))) + // let pose2ID = x.store(Pose2(Rot2(-0.2), Vector2(2.3, 0.1))) + // let pose3ID = x.store(Pose2(Rot2(.pi / 2), Vector2(4.1, 0.1))) + // let pose4ID = x.store(Pose2(Rot2(.pi), Vector2(4.0, 2.0))) + // let pose5ID = x.store(Pose2(Rot2(-.pi / 2), Vector2(2.1, 2.1))) - var graph = FactorGraph() - graph.store(BetweenFactor(pose2ID, pose1ID, Pose2(2.0, 0.0, .pi / 2))) - graph.store(BetweenFactor(pose3ID, pose2ID, Pose2(2.0, 0.0, .pi / 2))) - graph.store(BetweenFactor(pose4ID, pose3ID, Pose2(2.0, 0.0, .pi / 2))) - graph.store(BetweenFactor(pose5ID, pose4ID, Pose2(2.0, 0.0, .pi / 2))) - graph.store(PriorFactor(pose1ID, Pose2(0, 0, 0))) + // var graph = FactorGraph() + // graph.store(BetweenFactor(pose2ID, pose1ID, Pose2(2.0, 0.0, .pi / 2))) + // graph.store(BetweenFactor(pose3ID, pose2ID, Pose2(2.0, 0.0, .pi / 2))) + // graph.store(BetweenFactor(pose4ID, pose3ID, Pose2(2.0, 0.0, .pi / 2))) + // graph.store(BetweenFactor(pose5ID, pose4ID, Pose2(2.0, 0.0, .pi / 2))) + // graph.store(PriorFactor(pose1ID, Pose2(0, 0, 0))) - let optimizer = GradientDescent(learningRate: 1e-2) - for _ in 0..<10000 { - optimizer.update(&x, objective: graph) - } + // let optimizer = GradientDescent(learningRate: 1e-2) + // for _ in 0..<10000 { + // optimizer.update(&x, objective: graph) + // } - // Test condition: pose 5 should be identical to pose 1 (close loop). - XCTAssertEqual(between(x[pose1ID], x[pose5ID]).t.norm, 0.0, accuracy: 1e-2) - } + // // Test condition: pose 5 should be identical to pose 1 (close loop). + // XCTAssertEqual(between(x[pose1ID], x[pose5ID]).t.norm, 0.0, accuracy: 1e-2) + // } } From d28ebeeb30dc4520b0784a3349759b93092dc64e Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 10:33:00 -0500 Subject: [PATCH 11/34] Scripts and supporting code Big Transfer tracking --- Scripts/Andrew06.swift | 362 +++++++++++++ Scripts/Andrew07.swift | 342 +++++++++++++ Scripts/Andrew08.swift | 439 ++++++++++++++++ Scripts/main.swift | 2 +- Sources/BeeTracking/BigTransfer.swift | 482 ++++++++++++++++++ Sources/BeeTracking/TrackingFactorGraph.swift | 48 +- Sources/BeeTracking/TrackingMetrics.swift | 2 +- Sources/BeeTracking/Visualizations.swift | 33 +- .../Inference/FactorsStorage.swift | 14 +- .../Optimizers/GradientDescent.swift | 60 ++- 10 files changed, 1701 insertions(+), 83 deletions(-) create mode 100644 Scripts/Andrew06.swift create mode 100644 Scripts/Andrew07.swift create mode 100644 Scripts/Andrew08.swift create mode 100644 Sources/BeeTracking/BigTransfer.swift diff --git a/Scripts/Andrew06.swift b/Scripts/Andrew06.swift new file mode 100644 index 00000000..337d4d4e --- /dev/null +++ b/Scripts/Andrew06.swift @@ -0,0 +1,362 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import ModelSupport + +import PenguinStructures + +let tf = Python.import("tensorflow") +let np = Python.import("numpy") +let pickle = Python.import("pickle") + +// Optional to enable GPU training +// let _ = _ExecutionContext.global +// let device = Device.defaultXLA +let device = Device.default +let modelName = "BiT-M-R50x1" +var knownModels = [String: String]() +let knownDatasetSizes:[String: (Int, Int)] = [ + "bee_dataset": (40, 70) +] + +public struct LabeledData { + /// The `data` of our sample (usually used as input for a model). + public let data: Data + /// The `label` of our sample (usually used as target for a model). + public let label: Label + + /// Creates an instance from `data` and `label`. + public init(data: Data, label: Label) { + self.data = data + self.label = label + } +} + +// Script to train and track with Big Transfer +struct Andrew06: ParsableCommand { + /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the + /// knownDatasetSizes dictionary + enum DatasetNotFoundError: Error { + case invalidInput(String) + } + + /// Return relevent ResNet enumerated type based on weights loaded + /// + /// - Parameters: + /// - modelName: the name of the model pulled from the big transfer repository + /// to grab the enumerated type for + /// - Returns: ResNet enumerated type for BigTransfer model + func getModelUnits(modelName: String) -> BigTransfer.Depth { + if modelName.contains("R50") { + return .resNet50 + } + else if modelName.contains("R101") { + return .resNet101 + } + else { + return .resNet152 + } + } + + /// Get updated image resolution based on the specifications in BiT-Hyperrule + /// + /// - Parameters: + /// - originalResolution: the source resolution for the current image dataset + /// - Returns: new resolution for images based on BiT-Hyperrule + func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + + /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary + /// + /// - Parameters: + /// - datasetName: name of the current dataset you are using + /// - Returns: new resolution for specified dataset + /// - Throws: + /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary + func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) { + if let resolution = knownDatasetSizes[datasetName] { + return getResolution(originalResolution: resolution) + } + print("Unsupported dataset " + datasetName + ". Add your own here :)") + throw DatasetNotFoundError.invalidInput(datasetName) + + } + + /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: mixup alpha based on number of images + func getMixUp(datasetSize: Int) -> Double { + return datasetSize < 20000 ? 0.0 : 0.1 + } + + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } + public typealias Datum = (patch: Tensor, label: Tensor) + public typealias LabeledImage = LabeledData, Tensor> + public typealias Batches = Slices, label: Tensor)], ArraySlice>> + + func getTrainingDataBigTransfer( + from dataset: OISTBeeVideo, + numberForeground: Int = 10000, + numberBackground: Int = 10000 + ) -> [Datum] { + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0)) + } + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1)) + } + + var boxes = fgBoxes + bgBoxes + return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)} + } + + /// Stores the training statistics for the BigTransfer training process which are different than usual + /// because the mixedup labels must be accounted for while running training statistics. + struct BigTransferTrainingStatistics { + var correctGuessCount = Tensor(0, on: Device.default) + var totalGuessCount = Tensor(0, on: Device.default) + var totalLoss = Tensor(0, on: Device.default) + var batches: Int = 0 + var accuracy: Float { + Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 + } + var averageLoss: Float { totalLoss.scalarized() / Float(batches) } + + init(on device: Device = Device.default) { + correctGuessCount = Tensor(0, on: device) + totalGuessCount = Tensor(0, on: device) + totalLoss = Tensor(0, on: device) + } + + mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) { + let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1) + correctGuessCount += Tensor(correct).sum() + totalGuessCount += Int32(labels.shape[0]) + totalLoss += loss + batches += 1 + } + } + + fileprivate func makeBatch( + samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) { + let labels = Tensor(samples.map(\.label)) + let imageTensor = Tensor(samples.map(\.patch)) + return LabeledImage(data: imageTensor, label: labels) +} + // Train Big Transfer + func run() { + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)! + let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)! + + let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 20000, numberBackground: 20000) + let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600) + + + let classCount = 2 + var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName) + let dataCount = 40000 + + var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9) + optimizer = SGD(copying: optimizer, to: device) + + print("Beginning training...") + var batchSize: Int = 16 + var currStep: Int = 1 + let lrSupports = getSchedule(datasetSize: dataCount) + let scheduleLength = lrSupports.last! + let stepsPerEpoch = dataCount / batchSize + let epochCount = scheduleLength / stepsPerEpoch + let resizeSize = getResolution(originalResolution: (40, 70)) + + let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { + (batches: Batches) -> LazyMapSequence in + return batches.lazy.map{ makeBatch(samples: $0, device: device) } + } + + let validationData = validation.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device) + } + + for (epoch, batches) in trainingData.prefix(epochCount).enumerated() { + let start = Date() + var trainStats = BigTransferTrainingStatistics(on: device) + var testStats = BigTransferTrainingStatistics(on: device) + + Context.local.learningPhase = .training + for batch in batches { + if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) { + optimizer.learningRate = newLearningRate + currStep = currStep + 1 + } + else { + continue + } + + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray()) + var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + + let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device) + let labels = Tensor(copying: newLabels, to: device) + let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + trainStats.update(logits: ŷ, labels: labels, loss: loss) + return loss + } + + optimizer.update(&bitModel, along: 𝛁model) + + LazyTensorBarrier() + } + + print("Checking validation statistics...") + Context.local.learningPhase = .inference + for batch in validationData { + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + let images = Tensor(copying: resized, to: device) + let labels = Tensor(copying: newLabels, to: device) + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + LazyTensorBarrier() + testStats.update(logits: ŷ, labels: labels, loss: loss) + } + + print( + """ + [Epoch \(epoch)] \ + Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \ + Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \ + (\(String(format: "%.1f", trainStats.accuracy))%), \ + Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \ + Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \ + (\(String(format: "%.1f", testStats.accuracy))%) \ + seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start))) + """) + } + + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + + + let evalTracker: Tracker = {frames, start in + var tracker = makeProbabilisticTracker2( + model: bitModel, + frames: frames, + targetSize: (40, 70) + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + return track + + } + + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 19 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/andrew01/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + let encoder = JSONEncoder() + let data = try! encoder.encode($0.prediction) + FileManager.default.createFile(atPath: "prediction_bigtransfer_sequence_\(index).json", contents: data, attributes: nil) + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/andrew01/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + let f = Python.open("Results/EAO/bigtransfer.data", "wb") + pickle.dump(results.expectedAverageOverlap.curve, f) + + + } + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Andrew07.swift b/Scripts/Andrew07.swift new file mode 100644 index 00000000..83eb3ddb --- /dev/null +++ b/Scripts/Andrew07.swift @@ -0,0 +1,342 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import ModelSupport + +import PenguinStructures + +// Error gradient visualization script for Big Transfer +struct Andrew07: ParsableCommand { + /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the + /// knownDatasetSizes dictionary + enum DatasetNotFoundError: Error { + case invalidInput(String) + } + + /// Return relevent ResNet enumerated type based on weights loaded + /// + /// - Parameters: + /// - modelName: the name of the model pulled from the big transfer repository + /// to grab the enumerated type for + /// - Returns: ResNet enumerated type for BigTransfer model + func getModelUnits(modelName: String) -> BigTransfer.Depth { + if modelName.contains("R50") { + return .resNet50 + } + else if modelName.contains("R101") { + return .resNet101 + } + else { + return .resNet152 + } + } + + /// Get updated image resolution based on the specifications in BiT-Hyperrule + /// + /// - Parameters: + /// - originalResolution: the source resolution for the current image dataset + /// - Returns: new resolution for images based on BiT-Hyperrule + func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + + /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary + /// + /// - Parameters: + /// - datasetName: name of the current dataset you are using + /// - Returns: new resolution for specified dataset + /// - Throws: + /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary + func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) { + if let resolution = knownDatasetSizes[datasetName] { + return getResolution(originalResolution: resolution) + } + print("Unsupported dataset " + datasetName + ". Add your own here :)") + throw DatasetNotFoundError.invalidInput(datasetName) + + } + + /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: mixup alpha based on number of images + func getMixUp(datasetSize: Int) -> Double { + return datasetSize < 20000 ? 0.0 : 0.1 + } + + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } + public typealias Datum = (patch: Tensor, label: Tensor) + public typealias LabeledImage = LabeledData, Tensor> + public typealias Batches = Slices, label: Tensor)], ArraySlice>> + + func getTrainingDataBigTransfer( + from dataset: OISTBeeVideo, + numberForeground: Int = 10000, + numberBackground: Int = 10000 + ) -> [Datum] { + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0)) + } + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1)) + } + + var boxes = fgBoxes + bgBoxes + return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)} + } + + /// Stores the training statistics for the BigTransfer training process which are different than usual + /// because the mixedup labels must be accounted for while running training statistics. + struct BigTransferTrainingStatistics { + var correctGuessCount = Tensor(0, on: Device.default) + var totalGuessCount = Tensor(0, on: Device.default) + var totalLoss = Tensor(0, on: Device.default) + var batches: Int = 0 + var accuracy: Float { + Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 + } + var averageLoss: Float { totalLoss.scalarized() / Float(batches) } + + init(on device: Device = Device.default) { + correctGuessCount = Tensor(0, on: device) + totalGuessCount = Tensor(0, on: device) + totalLoss = Tensor(0, on: device) + } + + mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) { + let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1) + correctGuessCount += Tensor(correct).sum() + totalGuessCount += Int32(labels.shape[0]) + totalLoss += loss + batches += 1 + } + } + + fileprivate func makeBatch( + samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) { + let labels = Tensor(samples.map(\.label)) + let imageTensor = Tensor(samples.map(\.patch)) + return LabeledImage(data: imageTensor, label: labels) +} + // Train Big Transfer + func run() { + let plt = Python.import("matplotlib.pyplot") + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)! + let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)! + + let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 3000, numberBackground: 3000) + let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600) + + + let classCount = 2 + var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName) + let dataCount = 6000 + + var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9) + optimizer = SGD(copying: optimizer, to: device) + + print("Beginning training...") + var batchSize: Int = 16 + var currStep: Int = 1 + let lrSupports = getSchedule(datasetSize: dataCount) + let scheduleLength = lrSupports.last! + let stepsPerEpoch = dataCount / batchSize + let epochCount = scheduleLength / stepsPerEpoch + let resizeSize = getResolution(originalResolution: (40, 70)) + + let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { + (batches: Batches) -> LazyMapSequence in + return batches.lazy.map{ makeBatch(samples: $0, device: device) } + } + + let validationData = validation.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device) + } + + for (epoch, batches) in trainingData.prefix(epochCount).enumerated() { + let start = Date() + var trainStats = BigTransferTrainingStatistics(on: device) + var testStats = BigTransferTrainingStatistics(on: device) + + Context.local.learningPhase = .training + for batch in batches { + if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) { + optimizer.learningRate = newLearningRate + currStep = currStep + 1 + } + else { + continue + } + + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + //let cropped = tf.image.random_crop(resized.makeNumpyArray(), [batchSize, resizeSize.0, resizeSize.1, 3]) + let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray()) + var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + + let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device) + let labels = Tensor(copying: newLabels, to: device) + let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + trainStats.update(logits: ŷ, labels: labels, loss: loss) + return loss + } + + optimizer.update(&bitModel, along: 𝛁model) + + LazyTensorBarrier() + } + + print("Checking validation statistics...") + Context.local.learningPhase = .inference + for batch in validationData { + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + let images = Tensor(copying: resized, to: device) + let labels = Tensor(copying: newLabels, to: device) + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + LazyTensorBarrier() + testStats.update(logits: ŷ, labels: labels, loss: loss) + } + + print( + """ + [Epoch \(epoch)] \ + Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \ + Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \ + (\(String(format: "%.1f", trainStats.accuracy))%), \ + Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \ + Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \ + (\(String(format: "%.1f", testStats.accuracy))%) \ + seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start))) + """) + } + + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let frames = testData.frames + let firstTrack = testData.tracks[0] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + + let range = 100.0 + + let firstGroundTruth = firstObb.center + print("oBB coordinates", firstGroundTruth.t.x, firstGroundTruth.t.y) + + let (fig, axs) = plt.subplots(1,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + axs[0].imshow(fr / 255.0, cmap: "gray") + + + axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) + axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) + axs[1].set_xlim(0, range) + axs[1].set_ylim(0, range) + + let x = firstGroundTruth.t.x + let y = firstGroundTruth.t.y + + var values = Tensor(zeros: [Int(range), Int(range)]) + + for i in 0...Int(range)-1 { + for j in 0...Int(range)-1 { + let t = Vector2(x-range/2+Double(i), y-range/2+Double(j)) + let p = Pose2(firstGroundTruth.rot, t) + var v = VariableAssignments() + let poseId = v.store(p) + let startpose = v[poseId] + var fg = FactorGraph() + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: bitModel, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x) + print(j) + print(i) + } + } + let min_val = values.min() + if Double(min_val)! < 0 { + values = values-min_val + } + values = values/values.max()*255 + print(values[0...,0]) + print(values.shape) + axs[1].imshow(values.makeNumpyArray()) + + fig.savefig("./Results/andrew01/vizual_NNC.png", bbox_inches: "tight") + } + + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Andrew08.swift b/Scripts/Andrew08.swift new file mode 100644 index 00000000..d9c24018 --- /dev/null +++ b/Scripts/Andrew08.swift @@ -0,0 +1,439 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation +import ModelSupport + +import PenguinStructures + + +struct Andrew08: ParsableCommand { + /// This error indicates that BiT-Hyperrule cannot find the name of the dataset in the + /// knownDatasetSizes dictionary + enum DatasetNotFoundError: Error { + case invalidInput(String) + } + func initialize_and_perturb(p: Pose2) -> (Double, Double, Double, Pose2, VariableAssignments, TypedID, FactorGraph) { + var v = VariableAssignments() + let poseId = v.store(p) + v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + let dx = v[poseId].t.x - p.t.x + let dy = v[poseId].t.y - p.t.y + let dtheta = v[poseId].rot.theta - p.rot.theta + let startpose = v[poseId] + let fg = FactorGraph() + + return (dx, dy, dtheta, startpose, v, poseId, fg) + } + + func initialize_empty_arrays() -> (Bool, [Double], [Double], [Double], [Double]) { + var conv = true + var errors = [Double]() + var xs = [Double]() + var ys = [Double]() + var thetas = [Double]() + return (conv, errors, xs, ys, thetas) + } + /// Return relevent ResNet enumerated type based on weights loaded + /// + /// - Parameters: + /// - modelName: the name of the model pulled from the big transfer repository + /// to grab the enumerated type for + /// - Returns: ResNet enumerated type for BigTransfer model + func getModelUnits(modelName: String) -> BigTransfer.Depth { + if modelName.contains("R50") { + return .resNet50 + } + else if modelName.contains("R101") { + return .resNet101 + } + else { + return .resNet152 + } + } + + /// Get updated image resolution based on the specifications in BiT-Hyperrule + /// + /// - Parameters: + /// - originalResolution: the source resolution for the current image dataset + /// - Returns: new resolution for images based on BiT-Hyperrule + func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + + /// Get the source resolution for the current image dataset from the knownDatasetSizes dictionary + /// + /// - Parameters: + /// - datasetName: name of the current dataset you are using + /// - Returns: new resolution for specified dataset + /// - Throws: + /// - DatasetNotFoundError: will throw an error if the dataset cannot be found in knownDatasetSizes dictionary + func getResolutionFromDataset(datasetName: String) throws -> (Int, Int) { + if let resolution = knownDatasetSizes[datasetName] { + return getResolution(originalResolution: resolution) + } + print("Unsupported dataset " + datasetName + ". Add your own here :)") + throw DatasetNotFoundError.invalidInput(datasetName) + + } + + /// Get training mixup parameters based on Bit-Hyperrule specification for dataset sizes + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: mixup alpha based on number of images + func getMixUp(datasetSize: Int) -> Double { + return datasetSize < 20000 ? 0.0 : 0.1 + } + + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize == 100 { + return [25, 50, 75, 100] + } + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + print(step) + print(supports) + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } + public typealias Datum = (patch: Tensor, label: Tensor) + public typealias LabeledImage = LabeledData, Tensor> + public typealias Batches = Slices, label: Tensor)], ArraySlice>> + + func getTrainingDataBigTransfer( + from dataset: OISTBeeVideo, + numberForeground: Int = 10000, + numberBackground: Int = 10000 + ) -> [Datum] { + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(0)) + } + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (patch: Tensor($0.frame!.patch(at: $0.obb)).unstacked(alongAxis: 2)[0], label: Tensor(1)) + } + + var boxes = fgBoxes + bgBoxes + return boxes.map{(patch: Tensor(stacking: [$0.patch, $0.patch, $0.patch], alongAxis: 2), label: $0.label)} + } + + /// Stores the training statistics for the BigTransfer training process which are different than usual + /// because the mixedup labels must be accounted for while running training statistics. + struct BigTransferTrainingStatistics { + var correctGuessCount = Tensor(0, on: Device.default) + var totalGuessCount = Tensor(0, on: Device.default) + var totalLoss = Tensor(0, on: Device.default) + var batches: Int = 0 + var accuracy: Float { + Float(correctGuessCount.scalarized()) / Float(totalGuessCount.scalarized()) * 100 + } + var averageLoss: Float { totalLoss.scalarized() / Float(batches) } + + init(on device: Device = Device.default) { + correctGuessCount = Tensor(0, on: device) + totalGuessCount = Tensor(0, on: device) + totalLoss = Tensor(0, on: device) + } + + mutating func update(logits: Tensor, labels: Tensor, loss: Tensor) { + let correct = logits.argmax(squeezingAxis: 1) .== labels.argmax(squeezingAxis: 1) + correctGuessCount += Tensor(correct).sum() + totalGuessCount += Int32(labels.shape[0]) + totalLoss += loss + batches += 1 + } + } + + fileprivate func makeBatch( + samples: BatchSamples, device: Device) -> LabeledImage where BatchSamples.Element == (patch: Tensor, label: Tensor) { + let labels = Tensor(samples.map(\.label)) + let imageTensor = Tensor(samples.map(\.patch)) + return LabeledImage(data: imageTensor, label: labels) +} + // Train Big Transfer + func run() { + let plt = Python.import("matplotlib.pyplot") + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 80)! + let validationDataset = OISTBeeVideo(directory: dataDir, afterIndex: 80, length: 20)! + + let training = getTrainingDataBigTransfer(from: trainingDataset, numberForeground: 256, numberBackground: 256) + let validation = getTrainingDataBigTransfer(from: validationDataset, numberForeground: 600, numberBackground: 600) + + + let classCount = 2 + var bitModel = BigTransfer(classCount: classCount, depth: getModelUnits(modelName: modelName), modelName: modelName) + let dataCount = 6000 + + var optimizer = SGD(for: bitModel, learningRate: 0.003, momentum: 0.9) + optimizer = SGD(copying: optimizer, to: device) + + print("Beginning training...") + var batchSize: Int = 16 + var currStep: Int = 1 + let lrSupports = getSchedule(datasetSize: dataCount) + let scheduleLength = lrSupports.last! + let stepsPerEpoch = dataCount / batchSize + let epochCount = scheduleLength / stepsPerEpoch + let resizeSize = getResolution(originalResolution: (40, 70)) + + let trainingData = TrainingEpochs(samples: training, batchSize: batchSize).lazy.map { + (batches: Batches) -> LazyMapSequence in + return batches.lazy.map{ makeBatch(samples: $0, device: device) } + } + + let validationData = validation.inBatches(of: batchSize).lazy.map { + makeBatch(samples: $0, device: device) + } + + for (epoch, batches) in trainingData.prefix(epochCount).enumerated() { + let start = Date() + var trainStats = BigTransferTrainingStatistics(on: device) + var testStats = BigTransferTrainingStatistics(on: device) + + Context.local.learningPhase = .training + for batch in batches { + if let newLearningRate = getLearningRate(step: currStep, datasetSize: dataCount, baseLearningRate: 0.003) { + optimizer.learningRate = newLearningRate + currStep = currStep + 1 + } + else { + continue + } + + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let flipped = tf.image.random_flip_left_right(resized.makeNumpyArray()) + var newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + + let images = Tensor(copying: Tensor(numpy: flipped.numpy())!, to: device) + let labels = Tensor(copying: newLabels, to: device) + let 𝛁model = TensorFlow.gradient(at: bitModel) { bitModel -> Tensor in + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + trainStats.update(logits: ŷ, labels: labels, loss: loss) + return loss + } + + optimizer.update(&bitModel, along: 𝛁model) + + LazyTensorBarrier() + } + + print("Checking validation statistics...") + Context.local.learningPhase = .inference + for batch in validationData { + var (eagerImages, eagerLabels) = (batch.data, batch.label) + let resized = resize(images: eagerImages, size: (resizeSize.0, resizeSize.1)) + let newLabels = Tensor(Tensor(oneHotAtIndices: eagerLabels, depth: classCount)) + let images = Tensor(copying: resized, to: device) + let labels = Tensor(copying: newLabels, to: device) + let ŷ = bitModel(images) + let loss = softmaxCrossEntropy(logits: ŷ, probabilities: labels) + LazyTensorBarrier() + testStats.update(logits: ŷ, labels: labels, loss: loss) + } + + print( + """ + [Epoch \(epoch)] \ + Training Loss: \(String(format: "%.3f", trainStats.averageLoss)), \ + Training Accuracy: \(trainStats.correctGuessCount)/\(trainStats.totalGuessCount) \ + (\(String(format: "%.1f", trainStats.accuracy))%), \ + Test Loss: \(String(format: "%.3f", testStats.averageLoss)), \ + Test Accuracy: \(testStats.correctGuessCount)/\(testStats.totalGuessCount) \ + (\(String(format: "%.1f", testStats.accuracy))%) \ + seconds per epoch: \(String(format: "%.1f", Date().timeIntervalSince(start))) + """) + } + + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! + + let trackerEvaluation = TrackerEvaluationDataset(testData) + + let frames = testData.frames + let firstTrack = testData.tracks[0] + let firstFrame = frames[0] + let firstObb = firstTrack.boxes[0] + + let lr = 100.0 + var GDOptimizer = GradientDescent(learningRate: lr) + let it_limit = 80 + + + let folderName = "Results/GD_optimization_BiT_lr_\(lr)__10_22_2021_final_images_4subplots" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } + + print("hello1") + let (fig, axs) = plt.subplots(2,2).tuple2 + let fr = np.squeeze(firstFrame.makeNumpyArray()) + for i in 0...1 { + for j in 0...1 { + axs[i,j].imshow(fr / 255.0, cmap: "gray") + let firstGroundTruth = firstObb.center + axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50) + axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50) + axs[i,j].get_xaxis().set_visible(false) + axs[i,j].get_yaxis().set_visible(false) + } + } + axs[0,0].set_title("fabs(theta) < 6 Degrees", fontsize:8) + axs[0,1].set_title("fabs(theta) < 12 Degrees", fontsize:8) + axs[1,0].set_title("fabs(theta) < 16 Degrees", fontsize:8) + axs[1,1].set_title("fabs(theta) >= 16 Degrees", fontsize:8) + + print("hello") + let xy_thresh = 20.0 //pixels + let theta_thresh = 0.5 //radians // consider doing overlap. + + + // NN Params + let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) + let featureSize = 256 + let kHiddenDimension = 512 + + var useClassifier = true + if useClassifier { + var classifier = bitModel + for j in 0...200 { + var (dx, dy, dtheta, startpose, v, poseId, fg) = initialize_and_perturb(p: firstObb.center) + let factorNNC = ProbablisticTrackingFactor2(poseId, + measurement: firstFrame, + classifier: classifier, + patchSize: (40, 70), + appearanceModelSize: (40, 70) + ) + fg.store(factorNNC) + + + // PERFORM GRADIENT DESCENT + var (conv, errors, xs, ys, thetas) = initialize_empty_arrays() + print("starting optimization") + for i in 0.. firstObb.center.t.x + xy_thresh) || (v[poseId].t.x < firstObb.center.t.x - xy_thresh) + let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) + let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) + if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"g,", ms: 5) + } + + } else { + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { + axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { + axs[0,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.3 { + axs[1,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } else { + axs[1,1].plot(startpose.t.x,startpose.t.y,"r,", ms: 5) + } + } + let (figs, axes) = plotFrameWithPatches3(frame: firstFrame, start: startpose, end: v[poseId], expected: firstObb.center, firstGroundTruth: firstObb.center, errors: errors, xs: xs, ys: ys, thetas: thetas) + var final_err: Double + var label_err: Double + var start_err: Double + + + final_err = factorNNC.errorVector(v[poseId]).x + label_err = factorNNC.errorVector(firstObb.center).x + start_err = factorNNC.errorVector(startpose).x + + axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" + + "\n label err = \(label_err).x)" + + "\n start err = \(start_err)" + + "\n learning rate = \(lr)" + + "\n converged = \(conv)") + figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") + plt.close("all") + fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") + + } + } +} + + +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/main.swift b/Scripts/main.swift index bb8f75cd..9a878386 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -20,7 +20,7 @@ struct Scripts: ParsableCommand { subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando09.self, Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Andrew01.self, - Andrew05.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, + Andrew05.self, Andrew06.self, Andrew07.self, Andrew08.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, Frank01.self, Frank02.self, Frank03.self, Frank04.self]) } diff --git a/Sources/BeeTracking/BigTransfer.swift b/Sources/BeeTracking/BigTransfer.swift new file mode 100644 index 00000000..d1f57b56 --- /dev/null +++ b/Sources/BeeTracking/BigTransfer.swift @@ -0,0 +1,482 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Original source: +// "Big Transfer (BiT): General Visual Representation Learning" +// Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby. +// https://arxiv.org/abs/1912.11370 + +import Foundation +import TensorFlow +import PythonKit +import BeeDataset +import SwiftFusion + +let subprocess = Python.import("subprocess") +let np = Python.import("numpy") + + +/// Convenient layer wrapper used to load all of the trained layers from the .npz file downloaded from the +/// BigTransfer weights repository +struct BigTransferNamedLayer { + let name: String + var layer: Tensor +} + +func getResolution(originalResolution: (Int, Int)) -> (Int, Int) { + let area = originalResolution.0 * originalResolution.1 + return area < 96*96 ? (160, 128) : (512, 480) + } + +/// Get the necessary padding to maintain the network size specified in the BigTransfer architecture +/// +/// - Parameters: +/// - kernelSize: size n which represents the height and width of the nxn kernel +/// - Returns: the left / top padding and the right / bottom padding necessary to maintain correct output sizes +/// after convolution +func paddingFromKernelSize(kernelSize: Int) -> [(before: Int, after: Int)] { + let padTotal = kernelSize - 1 + let padBeginning = Int(padTotal / 2) + let padEnd = padTotal - padBeginning + let padding = [ + (before: 0, after: 0), + (before: padBeginning, after: padEnd), + (before: padBeginning, after: padEnd), + (before: 0, after: 0)] + return padding +} + +/// Get all of the pre-trained layers from the .npz file into a Swift array to load into the BigTransfer model +/// +/// - Parameters: +/// - modelName: model name that represents the weights to load from the BigTransfer weights repository +/// ("BiT-M-R50x1" for example) +/// - Returns: an array of layers and their associated name in the .npz file downloaded from the weights repository +func getPretrainedWeightsDict(modelName: String) -> Array { + let validTypes = ["BiT-S", "BiT-M"] + let validSizes = [(50, 1), (50, 3), (101, 1), (101, 3), (152, 4)] + let bitURL = "https://storage.googleapis.com/bit_models/" + var knownModels = [String: String]() + + for types in validTypes { + for sizes in validSizes { + let modelString = types + "-R" + String(sizes.0) + "x" + String(sizes.1) + knownModels[modelString] = bitURL + modelString + ".npz" + } + } + + if let modelPath = knownModels[modelName] { + subprocess.call("wget " + modelPath + " .", shell: true) + } + + let weights = np.load("./" + modelName + ".npz") + + var weightsArray = Array() + for param in weights { + weightsArray.append(BigTransferNamedLayer(name: String(param)!, layer: Tensor(numpy: weights[param])!)) + } + return weightsArray +} + +/// A 2D Convolution layer that standardizes the weights before the forward pass. This has been implemented in +/// accordance with the implementation in https://github.com/google-research/big_transfer/blob/49afe42338b62af9fbe18f0258197a33ee578a6b/bit_pytorch/models.py#L25 +public struct StandardizedConv2D: Layer { + public var conv: Conv2D + + public init( + filterShape: (Int, Int, Int, Int), + strides: (Int, Int) = (1, 1), + padding: Padding = .valid, + useBias: Bool = true + ) + { + self.conv = Conv2D( + filterShape: filterShape, + strides: strides, + padding: padding, + useBias: useBias) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + let axes: Array = [0, 1, 2] + var standardizedConv = conv + standardizedConv.filter = (standardizedConv.filter - standardizedConv.filter.mean(squeezingAxes: axes)) / sqrt((standardizedConv.filter.variance(squeezingAxes: axes) + 1e-16)) + return standardizedConv(input) + } + +} + +/// A standardized convolution and group norm layer as specified in the BigTransfer architecture +public struct ConvGNV2BiT: Layer { + public var conv: StandardizedConv2D + public var norm: GroupNorm + @noDerivative public var isSecond: Bool + + public init( + inFilters: Int, + outFilters: Int, + kernelSize: Int = 1, + stride: Int = 1, + padding: Padding = .valid, + isSecond: Bool = false + ) { + self.conv = StandardizedConv2D( + filterShape: (kernelSize, kernelSize, inFilters, outFilters), + strides: (stride, stride), + padding: padding, + useBias: false) + self.norm = GroupNorm( + offset: Tensor(zeros: [inFilters]), + scale: Tensor(zeros: [inFilters]), + groupCount: 2, + axis: -1, + epsilon: 0.001) + self.isSecond = isSecond + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + var normResult = norm(input) + if self.isSecond { + normResult = normResult.padded(forSizes: paddingFromKernelSize(kernelSize: 3)) + } + let reluResult = relu(normResult) + let convResult = conv(reluResult) + return convResult + } +} + +/// The shortcut in a residual block with standardized convolution and group normalization +public struct ShortcutBiT: Layer { + public var projection: StandardizedConv2D + public var norm: GroupNorm + @noDerivative public let needsProjection: Bool + + public init(inFilters: Int, outFilters: Int, stride: Int) { + needsProjection = (stride > 1 || inFilters != outFilters) + norm = GroupNorm( + offset: Tensor(zeros: [needsProjection ? inFilters : 1]), + scale: Tensor(zeros: [needsProjection ? inFilters : 1]), + groupCount: needsProjection ? 2 : 1, + axis: -1, + epsilon: 0.001) + + projection = StandardizedConv2D( + filterShape: (1, 1, needsProjection ? inFilters : 1, needsProjection ? outFilters : 1), + strides: (stride, stride), + padding: .valid, + useBias: false) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + var res = input + if needsProjection { + res = norm(res) + res = relu(res) + res = projection(res) + } + return res + } +} + +/// Residual block for BigTransfer with standardized convolution and group normalization layers +public struct ResidualBlockBiT: Layer { + public var shortcut: ShortcutBiT + public var convs: [ConvGNV2BiT] + + public init(inFilters: Int, outFilters: Int, stride: Int, expansion: Int){ + if expansion == 1 { + convs = [ + ConvGNV2BiT(inFilters: inFilters, outFilters: outFilters, kernelSize: 3, stride: stride), + ConvGNV2BiT(inFilters: outFilters, outFilters: outFilters, kernelSize: 3, isSecond: true) + ] + } else { + convs = [ + ConvGNV2BiT(inFilters: inFilters, outFilters: outFilters/4), + ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters/4, kernelSize: 3, stride: stride, isSecond: true), + ConvGNV2BiT(inFilters: outFilters/4, outFilters: outFilters) + ] + } + shortcut = ShortcutBiT(inFilters: inFilters, outFilters: outFilters, stride: stride) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + let convResult = convs.differentiableReduce(input) { $1($0) } + return convResult + shortcut(input) + } +} + +/// An implementation of the BigTransfer architecture with variable sizes +public struct BigTransfer: Layer { + public var inputStem: StandardizedConv2D + public var maxPool: MaxPool2D + public var residualBlocks: [ResidualBlockBiT] = [] + public var groupNorm : GroupNorm + public var flatten = Flatten() + public var classifier: Dense + public var avgPool = GlobalAvgPool2D() + @noDerivative public var finalOutFilter : Int = 0 + + /// Initialize the BigTransfer Model + /// + /// - Parameters: + /// - classCount: the number of output classes + /// - depth: the specified depht of the network based on the various ResNet architectures + /// - inputChannels: the number of input channels for the dataset + /// - stemFilters: the number of filters in the first three convolutions + public init( + classCount: Int, + depth: Depth, + inputChannels: Int = 3, + modelName: String = "BiT-M-R50x1", + loadWeights: Bool = true + ) { + + self.inputStem = StandardizedConv2D(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .valid, useBias: false) + self.maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2), padding: .valid) + let sizes = [64 / depth.expansion, 64, 128, 256, 512] + for (iBlock, nBlocks) in depth.layerBlockSizes.enumerated() { + let (nIn, nOut) = (sizes[iBlock] * depth.expansion, sizes[iBlock+1] * depth.expansion) + for j in 0..( + offset: Tensor(zeros: [self.finalOutFilter]), + scale: Tensor(zeros: [self.finalOutFilter]), + groupCount: 2, + axis: -1, + epsilon: 0.001) + self.classifier = Dense(inputSize: 512 * depth.expansion, outputSize: classCount) + + if loadWeights { + let weightsArray = getPretrainedWeightsDict(modelName: modelName) + + // Load weights from model .npz file into the BigTransfer model + let convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))} + + var k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape) + self.residualBlocks[idx].convs[jdx].conv.conv.filter = convs[k].layer + k = k + 1 + } + } + + let projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))} + var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")} + var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")} + + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1]) + { + assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape) + self.residualBlocks[idx].shortcut.projection.conv.filter = projectiveConvs[k].layer + + assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape) + self.residualBlocks[idx].shortcut.norm.scale = normScale[k].layer + + assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape) + self.residualBlocks[idx].shortcut.norm.offset = normOffset[k].layer + k = k + 1 + } + } + + normScale = weightsArray.filter {key in return key.name.contains("gamma")} + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(normScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape) + self.residualBlocks[idx].convs[jdx].norm.scale = normScale[k].layer + k = k + 1 + } + } + + normOffset = weightsArray.filter {key in return key.name.contains("beta")} + + var l = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(normOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape) + self.residualBlocks[idx].convs[jdx].norm.offset = normOffset[l].layer + l = l + 1 + } + } + + assert(self.groupNorm.scale.shape == normScale[k].layer.shape) + self.groupNorm.scale = normScale[k].layer + assert(self.groupNorm.offset.shape == normOffset[l].layer.shape) + self.groupNorm.offset = normOffset[l].layer + + let rootConvs = weightsArray.filter {key in return key.name.contains("root_block")} + assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape) + self.inputStem.conv.filter = rootConvs[0].layer + } + } + + @differentiable(wrt: imageBatch) + public func classify(_ imageBatch: Tensor) -> Tensor { + var grayscale = Tensor(imageBatch.unstacked(alongAxis: 3)[0]) + var rgbImages = Tensor(stacking: [grayscale, grayscale, grayscale], alongAxis: 3) + + var resizeSize = getResolution(originalResolution: (40, 70)) + var resized = resize(images: rgbImages, size: (resizeSize.0, resizeSize.1)) + return Tensor(callAsFunction(resized)) + } + + @differentiable + public func callAsFunction(_ input: Tensor) -> Tensor { + var paddedInput = input.padded(forSizes: paddingFromKernelSize(kernelSize: 7)) + paddedInput = inputStem(paddedInput).padded(forSizes: paddingFromKernelSize(kernelSize: 3)) + let inputLayer = maxPool(paddedInput) + let blocksReduced = residualBlocks.differentiableReduce(inputLayer) { $1($0) } + let normalized = relu(groupNorm(blocksReduced)) + return normalized.sequenced(through: avgPool, flatten, classifier) + } + + public func save(new_path: String = "new_weights", path: String = "BiT-M-R50x1") { + var weightsArray = getPretrainedWeightsDict(modelName: path) + + // Load weights from model .npz file into the BigTransfer model + var convs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && !(key.name.contains("proj"))} + var k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(self.residualBlocks[idx].convs[jdx].conv.conv.filter.shape == convs[k].layer.shape) + convs[k].layer = self.residualBlocks[idx].convs[jdx].conv.conv.filter + k = k + 1 + } + } + + var projectiveConvs = weightsArray.filter {key in return key.name.contains("/block") && key.name.contains("standardized_conv2d/kernel") && (key.name.contains("proj"))} + var normScale = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/gamma")} + var normOffset = weightsArray.filter {key in return key.name.contains("unit01/a/group_norm/beta")} + + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + if (i.shortcut.projection.conv.filter.shape != [1, 1, 1, 1]) + { + assert(self.residualBlocks[idx].shortcut.projection.conv.filter.shape == projectiveConvs[k].layer.shape) + projectiveConvs[k].layer = self.residualBlocks[idx].shortcut.projection.conv.filter + assert(self.residualBlocks[idx].shortcut.norm.scale.shape == normScale[k].layer.shape) + normScale[k].layer = self.residualBlocks[idx].shortcut.norm.scale + + assert(self.residualBlocks[idx].shortcut.norm.offset.shape == normOffset[k].layer.shape) + normOffset[k].layer = self.residualBlocks[idx].shortcut.norm.offset + k = k + 1 + } + } + + var gammaNormScale = weightsArray.filter {key in return key.name.contains("gamma")} + k = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(gammaNormScale[k].layer.shape == self.residualBlocks[idx].convs[jdx].norm.scale.shape) + gammaNormScale[k].layer = self.residualBlocks[idx].convs[jdx].norm.scale + k = k + 1 + } + } + + var betaNormOffset = weightsArray.filter {key in return key.name.contains("beta")} + + var l = 0 + for (idx, i) in self.residualBlocks.enumerated() { + for (jdx, _) in i.convs.enumerated() { + assert(betaNormOffset[l].layer.shape == self.residualBlocks[idx].convs[jdx].norm.offset.shape) + betaNormOffset[l].layer = self.residualBlocks[idx].convs[jdx].norm.offset + l = l + 1 + } + } + + assert(self.groupNorm.scale.shape == gammaNormScale[k].layer.shape) + gammaNormScale[k].layer = self.groupNorm.scale + assert(self.groupNorm.offset.shape == betaNormOffset[l].layer.shape) + betaNormOffset[l].layer = self.groupNorm.offset + + var rootConvs = weightsArray.filter {key in return key.name.contains("root_block")} + assert(self.inputStem.conv.filter.shape == rootConvs[0].layer.shape) + rootConvs[0].layer = self.inputStem.conv.filter + + var newWeights = convs + projectiveConvs + normScale + normOffset + gammaNormScale + betaNormOffset + + var weightDict: [String: Tensor] = [rootConvs[0].name: rootConvs[0].layer] + + for (i, weight) in newWeights.enumerated() { + weightDict[weight.name] = weight.layer + } + + let np = Python.import("numpy") + let zipfile = Python.import("zipfile") + let format = Python.import("numpy.lib.format") + let compat = Python.import("numpy.compat") + var file = compat.os_fspath(new_path + ".npz") + var zipf = zipfile.ZipFile(file, mode: "w", compression: zipfile.ZIP_STORED, allowZip64: true) + + for (i, weight) in weightsArray.enumerated() { + var fname = weight.name + ".npy" + let keyExists = weightDict[weight.name] != nil + var val = np.asanyarray( weight.layer.makeNumpyArray()) + if keyExists { + print("key does exist") + val = weightDict[weight.name]!.makeNumpyArray() + } + var file_id = zipf.open(fname, "w", force_zip64: true) + format.write_array(file_id, weight.layer.makeNumpyArray(), + allow_pickle: true) + } + + } +} + + + +extension BigTransfer { + public enum Depth { + case resNet18 + case resNet34 + case resNet50 + case resNet101 + case resNet152 + + var expansion: Int { + switch self { + case .resNet18, .resNet34: return 1 + default: return 4 + } + } + + var layerBlockSizes: [Int] { + switch self { + case .resNet18: return [2, 2, 2, 2] + case .resNet34: return [3, 4, 6, 3] + case .resNet50: return [3, 4, 6, 3] + case .resNet101: return [3, 4, 23, 3] + case .resNet152: return [3, 8, 36, 3] + } + } + } +} + +extension BigTransfer: Classifier {} \ No newline at end of file diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index 62942d3a..cca08239 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -134,8 +134,8 @@ public struct TrackingConfiguration { ) -> () /// The optimizer to use during inference. - public var optimizer = LM() - // public var optimizer = GradientDescent(learningRate: 1e-5) + // public var optimizer = LM() + public var optimizer = GradientDescent(learningRate: 1e-3) /// Creates an instance. /// @@ -201,57 +201,17 @@ public struct TrackingConfiguration { // Sample from motion model and take best pose var bestError = g.error(at: x) - // var posex = [Double]() - // var posey = [Double]() - // var posetheta = [Double]() - // var error = [Double]() - // var besterror = [Double]() - // time x , time y , time theta , time error - for _ in 0..<2000 { //2000 + for _ in 0..<256 { x[currentPoseID] = x[previousPoseID] - x[currentPoseID].perturbWith(stddev: Vector3(0.3, 8, 4.6)) + x[currentPoseID].perturbWith(stddev: Vector3(0.2, 8, 8)) let candidateError = g.error(at: x) - /// - // print("x", x) - // print("theta", x[currentPoseID].rot.theta, "vector", x[currentPoseID].t.x, x[currentPoseID].t.y) - // print("g.error(at: x)", g.error(at: x)) - // print("frame", i) - /// if candidateError < bestError { bestError = candidateError bestPose = x[currentPoseID] } - - // APPEND CURRENT ERROR - // posex.append(x[currentPoseID].t.x) - // posey.append(x[currentPoseID].t.y) - // posetheta.append(x[currentPoseID].rot.theta) - // error.append(candidateError) - // besterror.append(bestError) } x[currentPoseID] = bestPose - // let np = Python.import("numpy") - // let posex_np = Tensor(posex).makeNumpyArray() - // let posey_np = Tensor(posey).makeNumpyArray() - // let posetheta_np = Tensor(posetheta).makeNumpyArray() - // let error_np = Tensor(error).makeNumpyArray() - // let besterror_np = Tensor(besterror).makeNumpyArray() - - // let folderName = "sampling" - // if !FileManager.default.fileExists(atPath: folderName) { - // do { - // try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - // } catch { - // print(error.localizedDescription) - // } - // } - - // np.save("./sampling/sampling_frame_\(i)_posex.npy", posex_np) - // np.save("./sampling/sampling_frame_\(i)_posey.npy", posey_np) - // np.save("./sampling/sampling_frame_\(i)_posetheta.npy", posetheta_np) - // np.save("./sampling/sampling_frame_\(i)_error.npy", error_np) - // np.save("./sampling/sampling_frame_\(i)_besterror.npy", besterror_np) } diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index d582ee02..ff429128 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -42,7 +42,7 @@ public struct SubsequenceMetrics: Codable { // Find the first failure frame. var NFsa = prediction.count for (index, overlap) in overlaps.enumerated() { - if overlap < 0.1 { + if overlap < 0.05 { NFsa = index break } diff --git a/Sources/BeeTracking/Visualizations.swift b/Sources/BeeTracking/Visualizations.swift index bd643cf0..f032db6b 100644 --- a/Sources/BeeTracking/Visualizations.swift +++ b/Sources/BeeTracking/Visualizations.swift @@ -88,21 +88,11 @@ public func plotPoseDifference(track: [Pose2], withGroundTruth expected: [Pose2] public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: Pose2, firstGroundTruth: Pose2) -> (PythonObject, PythonObject) { let plt = Python.import("matplotlib.pyplot") let mpl = Python.import("matplotlib") - // print("plottingFrameWithPatches") - // print("actual Pose", actual, expected) - // print("eh") + let (fig, ax) = plt.subplots(figsize: Python.tuple([8, 4])).tuple2 - // print("printing the frame shape") - // print(frame) - // print(frame.shape) - let np = Python.import("numpy") - let fr = np.squeeze(frame.makeNumpyArray()) - ax.imshow(fr / 255.0, cmap: "gray") - // print("eh2") + ax.imshow(frame.makeNumpyArray() / 255.0, cmap: "gray") let actualBoundingBox = OrientedBoundingBox(center: actual, rows: 40, cols: 70) ax.plot(actualBoundingBox.corners.map{$0.x} + [actualBoundingBox.corners.first!.x], actualBoundingBox.corners.map{$0.y} + [actualBoundingBox.corners.first!.y], "r-") - // print("eh3") - // ax.plot(Python.tuple(actualBoundingBox.rot.) var supportPatch = mpl.patches.RegularPolygon( Python.tuple([actualBoundingBox.center.t.x, actualBoundingBox.center.t.y]), numVertices:3, @@ -112,10 +102,9 @@ public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: ) ax.add_patch(supportPatch) - let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-") - // print("eh5") + supportPatch = mpl.patches.RegularPolygon( Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), numVertices:3, @@ -123,11 +112,10 @@ public func plotFrameWithPatches(frame: Tensor, actual: Pose2, expected: color:"b", orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) ) - // print("eh6") ax.add_patch(supportPatch) - ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) - ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) - // print("eh7") + ax.set_xlim(expected.t.x - 100, expected.t.x + 100) + ax.set_ylim(expected.t.y - 100, expected.t.y + 100) + ax.title.set_text("Prediction (Red) vs. Actual (Green)") return (fig, ax) } @@ -204,15 +192,11 @@ public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2 let np = Python.import("numpy") let fr = np.squeeze(frame.makeNumpyArray()) ax.imshow(fr / 255.0, cmap: "gray") - // print("eh2") let startBoundingBox = OrientedBoundingBox(center: start, rows: 40, cols: 70) ax.plot(startBoundingBox.corners.map{$0.x} + [startBoundingBox.corners.first!.x], startBoundingBox.corners.map{$0.y} + [startBoundingBox.corners.first!.y], "g-") - // print("eh3") - // ax.plot(Python.tuple(startBoundingBox.rot.) let expectedBoundingBox = OrientedBoundingBox(center: expected, rows: 40, cols: 70) ax.plot(Python.list(expectedBoundingBox.corners.map{$0.x} + [expectedBoundingBox.corners.first!.x]), Python.list(expectedBoundingBox.corners.map{$0.y} + [expectedBoundingBox.corners.first!.y]), "b-") - // print("eh5") var supportPatch = mpl.patches.RegularPolygon( Python.tuple([expectedBoundingBox.center.t.x, expectedBoundingBox.center.t.y]), numVertices:3, @@ -220,7 +204,6 @@ public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2 color:"b", orientation: expectedBoundingBox.center.rot.theta - (Double.pi / 2) ) - // print("eh6") ax.add_patch(supportPatch) supportPatch = mpl.patches.RegularPolygon( Python.tuple([startBoundingBox.center.t.x, startBoundingBox.center.t.y]), @@ -234,8 +217,6 @@ public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2 let endBoundingBox = OrientedBoundingBox(center: end, rows: 40, cols: 70) ax.plot(endBoundingBox.corners.map{$0.x} + [endBoundingBox.corners.first!.x], endBoundingBox.corners.map{$0.y} + [endBoundingBox.corners.first!.y], "r-") - // print("eh3") - // ax.plot(Python.tuple(endBoundingBox.rot.) supportPatch = mpl.patches.RegularPolygon( Python.tuple([endBoundingBox.center.t.x, endBoundingBox.center.t.y]), numVertices:3, @@ -249,7 +230,6 @@ public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2 ax.set_xlim(firstGroundTruth.t.x - 200, firstGroundTruth.t.x + 200) ax.set_ylim(firstGroundTruth.t.y - 200, firstGroundTruth.t.y + 200) - // print("eh7") ax.title.set_text("Start (Green), End (Red), vs. Label (Blue)") let ax1 = axs[0][1] @@ -269,7 +249,6 @@ public func plotFrameWithPatches3(frame: Tensor, start: Pose2, end: Pose2 ax5.plot(np.arange(0,xs.count), thetas) ax5.title.set_text("Theta") - // var spec = mpl.gridspec.GridSpec(ncols: 2, nrows: 1, width_ratios: [2, 1]) return (fig, ax) diff --git a/Sources/SwiftFusion/Inference/FactorsStorage.swift b/Sources/SwiftFusion/Inference/FactorsStorage.swift index 0f1c4d78..008ae51f 100644 --- a/Sources/SwiftFusion/Inference/FactorsStorage.swift +++ b/Sources/SwiftFusion/Inference/FactorsStorage.swift @@ -62,9 +62,17 @@ extension ArrayStorage where Element: VectorFactor { let (lFactor, lVars) = factor.linearizableComponent(at: vars) let gradIndices = LVariables.linearized(lFactor.edges) let grads = GradVariables(at: gradIndices, in: GradVariables.withoutMutation(gradBufs)) - let newGrads = grads + gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1).x } - // print("FactorsStorage", lFactor.errorVector(at: lVars)) - // let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } + + var newGrads = grads + if let gradUpdate = lFactor.errorVector(at: lVars) as? Vector3 { + newGrads = newGrads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } + } + else { + var currGrads = gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1 + Vector1(1000.0)).squaredNorm } as! PenguinStructures.Tuple + currGrads.head.x = currGrads.head.x / 100.0 + newGrads = newGrads + (currGrads as! Element.LinearizableComponent.Variables.TangentVector) + } + newGrads.assign(into: gradIndices, in: gradBufs) } } diff --git a/Sources/SwiftFusion/Optimizers/GradientDescent.swift b/Sources/SwiftFusion/Optimizers/GradientDescent.swift index 90dbbd59..f46582a2 100644 --- a/Sources/SwiftFusion/Optimizers/GradientDescent.swift +++ b/Sources/SwiftFusion/Optimizers/GradientDescent.swift @@ -17,26 +17,72 @@ import _Differentiation public struct GradientDescent { /// The fraction of the gradient to move per step. public var learningRate: Double + public var baseLearningRate: Double /// Creates an instance with the given `learningRate`. public init(learningRate: Double) { self.learningRate = learningRate + self.baseLearningRate = learningRate } - + /// Get the learning rate schedule based on the dataset size + /// + /// - Parameters: + /// - datasetSize: number of images in the current dataset + /// - Returns: learning rate schedule based on the current dataset + func getSchedule(datasetSize: Int) -> Array { + if datasetSize == 100 { + return [3, 6, 10, 100] + } + if datasetSize < 20000{ + return [100, 200, 300, 400, 500] + } + else if datasetSize < 500000 { + return [500, 3000, 6000, 9000, 10000] + } + else { + return [500, 6000, 12000, 18000, 20000] + } + } + /// Get learning rate at the current step given the dataset size and base learning rate + /// + /// - Parameters: + /// - step: current training step + /// - datasetSize: number of images in the dataset + /// - baseLearningRate: starting learning rate to modify + /// - Returns: learning rate at the current step in training + func getLearningRate(step: Int, datasetSize: Int, baseLearningRate: Float = 0.003) -> Float? { + let supports = getSchedule(datasetSize: datasetSize) + // Linear warmup + if step < supports[0] { + return baseLearningRate * Float(step) / Float(supports[0]) + } + // End of training + else if step >= supports.last! { + return nil + } + // Staircase decays by factor of 10 + else { + var baseLearningRate = baseLearningRate + for s in supports[1...] { + if s < step { + baseLearningRate = baseLearningRate / 10.0 + } + } + return baseLearningRate + } + } /// Moves `values` along the gradient of `objective`'s error function for a single gradient /// descent step. public func update(_ values: inout VariableAssignments, objective: FactorGraph) { - // print(objective.errorGradient(at: values)) values.move(along: -learningRate * objective.errorGradient(at: values)) } } extension GradientDescent : Optimizer { public mutating func optimize(graph: FactorGraph, initial: inout VariableAssignments) { - // for _ in 0..<100 { - // self.update(&initial, objective: graph) - // } - print("gd doing nothing") - // self.update(&initial, objective: graph) + for i in 0..<15 { + self.learningRate = Double(getLearningRate(step: i + 1, datasetSize: 100, baseLearningRate: Float(self.baseLearningRate))!) + self.update(&initial, objective: graph) + } } } \ No newline at end of file From e3af5ca5994a315f363a1811e7d9ee60d2c1dcd8 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Thu, 9 Dec 2021 10:48:30 -0500 Subject: [PATCH 12/34] cleaned up the code --- Scripts/Brando01.swift | 42 +- Scripts/Brando02.swift | 18 +- Scripts/Brando03.swift | 72 +- Scripts/Brando04.swift | 121 +-- Scripts/Brando04d1.swift | 121 +++ Scripts/Brando10.swift | 20 - Scripts/Brando12.swift | 12 +- Scripts/Brando14.swift | 61 +- Scripts/Brando15.swift | 91 +++ Scripts/Brando16.swift | 126 +++ Scripts/Brandounittest.swift | 0 Scripts/main.swift | 4 +- .../AppearanceRAE+Serialization.swift | 46 +- Sources/BeeTracking/NNClassifier.swift | 742 +++++++++--------- .../BeeTracking/OISTBeeVideo+Batches.swift | 23 +- .../BeeTracking/ProbabilisticTracker.swift | 2 +- Sources/BeeTracking/TrackingFactorGraph.swift | 51 +- .../Inference/FactorsStorage.swift | 4 +- Sources/SwiftFusion/Optimizers/LM.swift | 4 - Tests/BrandoTests/NNClassifierTests.swift | 107 --- Tests/BrandoTests/NNClassifierTests2.swift | 60 -- Tests/BrandoTests/TrackingTests.swift | 0 22 files changed, 803 insertions(+), 924 deletions(-) create mode 100644 Scripts/Brando04d1.swift create mode 100644 Scripts/Brando15.swift create mode 100644 Scripts/Brando16.swift delete mode 100644 Scripts/Brandounittest.swift delete mode 100644 Tests/BrandoTests/NNClassifierTests.swift delete mode 100644 Tests/BrandoTests/NNClassifierTests2.swift delete mode 100644 Tests/BrandoTests/TrackingTests.swift diff --git a/Scripts/Brando01.swift b/Scripts/Brando01.swift index a4c3cb03..0a4b9340 100644 --- a/Scripts/Brando01.swift +++ b/Scripts/Brando01.swift @@ -9,22 +9,8 @@ import Foundation import PenguinStructures -/// Brando01 Tracker OpenCV +/// Brando01: Tracker OpenCV struct Brando01: ParsableCommand { - // @Option(help: "Run on track number x") - // var trackId: Int = 0 - - // @Option(help: "Run for number of frames") - // var trackLength: Int = 80 - - // @Option(help: "Size of feature space") - // var featureSize: Int = 5 - - // @Option(help: "Pretrained weights") - // var weightsFile: String? - - // Runs RAE tracker on n number of sequences and outputs relevant images and statistics - // Make sure you have a folder `Results/andrew01` before running func run() { let dataDir = URL(fileURLWithPath: "./OIST_Data") @@ -38,26 +24,18 @@ struct Brando01: ParsableCommand { let np = Python.import("numpy") let cv2 = Python.import("cv2") - // print(Python.version) - // print("hello") + let evalTracker: Tracker = {frames, start in let tracker = cv2.TrackerMIL_create() - // var tracker = cv2.Tracker_create("MIL") - // print(frames.first!.makeNumpyArray()) - // BB = (width-35,height-35,70,70) - //leads to an error when BBox area is more than 40*70? + var BB = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-35, 40, 70]) var smallframe = np.array(frames.first!.makeNumpyArray()) - print("hello2") - // cv2.circle(smallframe, Python.tuple([Int(start.center.t.x),Int(start.center.t.y)]), 10, Python.tuple([255,255,255]), 5) let leftpt = Python.tuple([Int(start.center.t.x)-35, Int(start.center.t.y)-35]) let rgtpt = Python.tuple([Int(start.center.t.x)+35, Int(start.center.t.y)+35]) cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5) - print("hello3") cv2.imwrite("./image_new.png", smallframe) - // tracker.init(frames.first!.makeNumpyArray(), BB) tracker[dynamicMember: "init"](frames.first!.makeNumpyArray(), BB) var results = [PythonObject]() for (index, frame) in frames.enumerated() { @@ -67,18 +45,11 @@ struct Brando01: ParsableCommand { if Bool(track_success)! { results.append(newBB) } - // newBB - // let smallframe = frame.makeNumpyArray() - // cv2.rectangle(smallframe, leftpt, rgtpt, Python.tuple([0,150,0]), 5) - // cv2.imshow("SiamMask", smallframe) + } print("printing python BB") - results.map{print($0)} - // print("hello") - // print(type(of: results)) - // print(results) var track = [OrientedBoundingBox]() for result in results { let pythonBB = result.tuple4 @@ -86,7 +57,6 @@ struct Brando01: ParsableCommand { let cols = Int(pythonBB.3)! let rot = Rot2(0) let vect = Vector2(Double(pythonBB.0)!+20, Double(pythonBB.1)!+35) - // let vect = Vector2(Double(pythonBB.0)! + Double(rows)/2, Double(pythonBB.1)! + Double(cols)/2) let center = Pose2(rot, vect) let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols) track.append(swiftBB) @@ -101,7 +71,6 @@ struct Brando01: ParsableCommand { let plt = Python.import("matplotlib.pyplot") let sequenceCount = 1 var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: 1, deltaAnchor: 175, outputFile: "brando01") - // print(results) for (index, value) in results.sequences.prefix(1).enumerated() { var i: Int = 0 zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { @@ -129,8 +98,5 @@ struct Brando01: ParsableCommand { print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") - - } - } \ No newline at end of file diff --git a/Scripts/Brando02.swift b/Scripts/Brando02.swift index 7b271da2..ad027cb2 100644 --- a/Scripts/Brando02.swift +++ b/Scripts/Brando02.swift @@ -9,21 +9,17 @@ import Foundation import PenguinStructures -/// Brando01 OpenCV tracker +/// Brando02 OpenCV tracker struct Brando02: ParsableCommand { func run() { let np = Python.import("numpy") let cv2 = Python.import("cv2") let os = Python.import("os") - // let imutils = Python.import("utils") - print(Python.version) - print(Python.tuple([1,3,4])) let image_names = os.listdir("../OIST_Data/downsampled") let track_names = os.listdir("../OIST_Data/tracks") image_names.sort() track_names.sort() - // let tracker = cv2.TrackerCSRT_create() let track = track_names[10] let frame = cv2.imread("../OIST_Data/downsampled/" + image_names[0]) let centers = Python.list() @@ -37,19 +33,13 @@ struct Brando02: ParsableCommand { continue } i += 1 - // print(type(of: line)) let lineSwift = String(line) - // print(type(of: lineSwift)) - let lineSwift2 = lineSwift ?? "" - // print(lineSwift2) let nums = lineSwift2.components(separatedBy: " ") - // print(nums) let height = Float(nums[1]) let width = Float(nums[0]) centers.append(Python.tuple([Python.float(width),Python.float(height)])) } - // print(centers) let width1 = Float(centers[0][0]) @@ -58,9 +48,6 @@ struct Brando02: ParsableCommand { let height = height1 ?? 0 let BB = Python.tuple([Int(width-35),Int(height-35),70,70]) let tracker = cv2.TrackerMIL_create() - // print(frames.first!.makeNumpyArray()) - // BB = (width-35,height-35,70,70) - print(type(of: tracker)) tracker[dynamicMember: "init"](frame, BB) var results = [PythonObject]() for image_name in image_names { @@ -71,9 +58,6 @@ struct Brando02: ParsableCommand { if Bool(track_success)! { results.append(newBB) } - // if Bool(track_success) { - // results.append(BB) - // } } diff --git a/Scripts/Brando03.swift b/Scripts/Brando03.swift index 7f7f9a1e..cc6b4f3b 100644 --- a/Scripts/Brando03.swift +++ b/Scripts/Brando03.swift @@ -9,37 +9,18 @@ import Foundation import PenguinStructures -/// Brando01 SiamMask +/// Brando01 SiamMask Tracker struct Brando03: ParsableCommand { - // @Option(help: "Run on track number x") - // var trackId: Int = 0 - - // @Option(help: "Run for number of frames") - // var trackLength: Int = 80 - - // @Option(help: "Size of feature space") - // var featureSize: Int = 5 - - // @Option(help: "Pretrained weights") - // var weightsFile: String? - // Runs RAE tracker on n number of sequences and outputs relevant images and statistics - // Make sure you have a folder `Results/andrew01` before running func run() { let dataDir = URL(fileURLWithPath: "./OIST_Data") - // let data = OISTBeeVideo(directory: dataDir, length: 100)! let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! - // print("number of frames in training data:", data.labels.count) print("number of frames in testing data", testData.labels.count, "\n\n") let trackerEvaluation = TrackerEvaluationDataset(testData) - // let shpl = Python.import("shapely") let os = Python.import("os") - - // print(os.environ) - // let plt = Python.import("matplotlib") let torch = Python.import("torch") let np = Python.import("numpy") @@ -58,15 +39,11 @@ struct Brando03: ParsableCommand { parser.add_argument("--resume") parser.add_argument("--config") parser.add_argument("--base_path") - // parser.add_argument("--cpu") - // let args = parser.parse_args(["--resume", "../SiamMask/experiments/siammask_sharp/SiamMask_VOT.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) - // let args = parser.parse_args(["--resume", "../SiamMask/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) let args = parser.parse_args(["--resume", "../SiamMask/model_sharp/checkpoint_e20.pth", "--config", "../SiamMask/experiments/siammask_sharp/config_vot.json", "--base_path", "./OIST_Data/downsampled"]) print("ARGUMENTS", args) - // let imutils = Python.import("utils") print(Python.version) print("hello") let evalTracker: Tracker = { frames, start in @@ -78,44 +55,25 @@ struct Brando03: ParsableCommand { // # Setup Model let cfg = cfhelper.load_config(args) let custom = Python.import("SiamMask.experiments.siammask_sharp.custom") - // // from custom import Custom var siammask = custom.Custom(anchors: cfg["anchors"]) - // if args.resume: - // assert isfile(args.resume), 'Please download {} first.'.format(args.resume) siammask = ldhelper.load_pretrain(siammask, args.resume) siammask.eval().to(device) - - // # Parse Image file - // img_files = sorted(glob.glob(join(args.base_path, '*.jp*'))) - // ims = [cv2.imread(imf) for imf in img_files] - - // # Select ROI - // cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) - // # cv2.setWindowProperty("SiamMask", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) - // try: - // init_rect = cv2.selectROI('SiamMask', ims[0], False, False) let init_rect = Python.tuple([Int(start.center.t.x)-20, Int(start.center.t.y)-20, 40, 70]) let tup = init_rect.tuple4 let x = tup.0 let y = tup.1 let w = tup.2 let h = tup.3 - // x, y, w, h = init_rect - // // except: - // // exit() - // var toc = 0 var state: PythonObject = 0 var results = [PythonObject]() for (f, im) in frames.enumerated() { - // for f, im in enumerate(ims): - // let tic = cv2.getTickCount() + let im_np = im.makeNumpyArray() let im_3d = np.squeeze(np.stack(Python.tuple([im_np, im_np, im_np]), axis: 2)) - // print("image shape", im_3d.shape) - // cv2.imshow("SiamMask", im_3d) + if f == 0 { // init let target_pos = np.array([x + w / 2, y + h / 2]) let target_sz = np.array([w, h]) @@ -125,21 +83,14 @@ struct Brando03: ParsableCommand { state = smtest.siamese_track(state, im_3d, mask_enable: true, refine_enable: true, device: device) //# track let location = state["ploygon"].flatten() - // cv2.polylines(im_3d, [np.int0(location).reshape(Python.tuple([-1, 1, 2]))], true, Python.tuple([0,255,0]), 3) - // cv2.circle(im_3d, Python.tuple([centx, centy]), 10, Python.tuple([0,255,255]), 5) - // cv2.imwrite("SiamMask"+String(f)+".png", im_3d) - // let mask = state["mask"] > state["p"].seg_thr + results.append(location) - // im[:, :, 2] = (mask > 0) * 255 + (mask == 0) * im[:, :, 2] } } - // results.map{print($0)} - // print("hello") - // print(type(of: results)) - // print(results) + var track = [OrientedBoundingBox]() for (i, result) in results.enumerated() { if i > 0 { @@ -177,7 +128,6 @@ struct Brando03: ParsableCommand { if dx != 0 { theta = atan(dy/dx) } - // if locx >= centx && locy >= centy{} if locx >= centx && locy < centy{ theta = -theta @@ -190,7 +140,6 @@ struct Brando03: ParsableCommand { let rot = Rot2(theta) let vect = Vector2(Double(centx), Double(centy)) - // let vect = Vector2(Double(pythonBB.0)! + Double(rows)/2, Double(pythonBB.1)! + Double(cols)/2) print("rotation", rot, "\n\n") let center = Pose2(rot, vect) let swiftBB = OrientedBoundingBox(center: center, rows: rows, cols: cols) @@ -200,26 +149,17 @@ struct Brando03: ParsableCommand { track.append(swiftBB) } } - // print(track) return track } let plt = Python.import("matplotlib.pyplot") let sequenceCount = 20 var eval_results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "brando03") - // print(results) print("done evaluating") var total_overlap = eval_results.sequences.prefix(sequenceCount)[0].subsequences.first!.metrics.overlap - // total_overlap += eval_results.sequences.prefix(sequenceCount)[1].subsequences.first!.metrics.overlap for (index, value) in eval_results.sequences.prefix(sequenceCount).enumerated() { - // var i: Int = 0 - // zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { - // let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) - // fig.savefig("Results/brando03/sequence\(index)/brando03\(i).png", bbox_inches: "tight") - // plt.close("all") - // i = i + 1 - // } + print("done,", index) let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") diff --git a/Scripts/Brando04.swift b/Scripts/Brando04.swift index 4c91f009..d1354f30 100644 --- a/Scripts/Brando04.swift +++ b/Scripts/Brando04.swift @@ -14,46 +14,47 @@ import Foundation struct Brando04: ParsableCommand { typealias LikelihoodModel = TrackingLikelihoodModel - @Flag(help: "Training mode") var training: Bool = false - let num_boxes: Int = 3000 + let num_boxes: Int = 10000 func getTrainingDataBG( - from dataset: OISTBeeVideo, - numberForeground: Int = 3000 - ) -> [LikelihoodModel.Datum] { + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { print("bg") - // var allBoxes = [LikelihoodModel.Datum]() let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { - (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) + $0.frame!.patch(at: $0.obb) } print("bg2") - - - return bgBoxes + let labels = Tensor(zeros: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) } + + func getTrainingDataFG( - from dataset: OISTBeeVideo, - numberForeground: Int = 3000 - ) -> [LikelihoodModel.Datum] { + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { print("fg") - // var allBoxes = [LikelihoodModel.Datum]() - let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { - (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) + let bgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + $0.frame!.patch(at: $0.obb) } - print("fg2") - - return fgBoxes + print("bg2") + let labels = Tensor(zeros: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) } - // Just runs an RP tracker and saves image to file // Make sure you have a folder `Results/fan12` before running func run() { @@ -70,77 +71,37 @@ struct Brando04: ParsableCommand { let dataDir = URL(fileURLWithPath: "./OIST_Data") - print("hello") - - // if I call makeBackgroundBoundingBoxes, makeForegroundBoundingBoxes. let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! - print("done") - var bgBoxes = getTrainingDataBG(from: trainingDataset) - print(bgBoxes.count) - // let trainingDataset2 = OISTBeeVideo(directory: dataDir, length: 100)! - print("2") - var fgBoxes = getTrainingDataFG(from: trainingDataset) - print(fgBoxes.count) + var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset) + var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset) - // print("all boxes") - var allBoxes = [LikelihoodModel.Datum]() - for i in 0...(fgBoxes.count-1)/100 { - //appending 100 bounding boxes - for j in 0...99 { - allBoxes.append(bgBoxes[j+i*100]) - } - //appending 100 bounding boxes - for j in 0...99 { - allBoxes.append(fgBoxes[j+i*100]) - } - } - print("total boxes", allBoxes.count) - // for i in 0...allBoxes.count-1 { - // print(i) - // print(allBoxes[i].type) - // print(allBoxes[i].obb) - // } - - - let patches = Tensor(stacking: allBoxes.map { $0.frame!.patch(at: $0.obb)}) - let labels = Tensor(stacking: allBoxes.map { $0.type == TrackingLikelihoodModel.PatchType.bg ? Tensor(0) : Tensor(1)}) + var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked()) + var labels = Tensor(concatenate(labels_bg, labels_fg)) print("shape of patches", patches.shape) print("shape of labels", labels.shape) - // return - - // let trainingData = allBoxes - // let trainingData = (images, labels) - // print("training data shape", trainingData.shape) - print("training data done") - // for featSize in [64,128,256] { - // for kHiddenDimension in [256,512] { + let kHiddenDimension = 512 - let featSize = 256 - for i in 1...7 { + let featSize = 512 + let iterations = [5,6,7] + + for i in iterations { + let path = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_60000boxes_600epochs.npy" + if FileManager.default.fileExists(atPath: path) { + print("File Already Exists. Abort training") + continue + } print("Training...") - // let rae: PretrainedNNClassifier = PretrainedNNClassifier( - // patches: patches, - // labels: labels, - // given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "") - // ) - // rae.save(to: "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i).npy") - // let rae: PretrainedSmallerNNClassifier = PretrainedSmallerNNClassifier( - // patches: patches, - // labels: labels, - // given: PretrainedSmallerNNClassifier.HyperParameters(latentDimension: featSize, weightFile: "") - // ) - // rae.save(to: "./classifiers/classifiers_today/small_classifier_weight_\(featSize)_\(i).npy") - let rae: PretrainedLargerNNClassifier = PretrainedLargerNNClassifier( + let rae: PretrainedNNClassifier = PretrainedNNClassifier( patches: patches, labels: labels, - given: PretrainedLargerNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "") + given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: "", learningRate: 1e-3), + train_mode: "from_scratch" ) - rae.save(to: "./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featSize)_\(i).npy") - print("saved") + rae.save(to: path) + } - // } - // } + } diff --git a/Scripts/Brando04d1.swift b/Scripts/Brando04d1.swift new file mode 100644 index 00000000..9d225f84 --- /dev/null +++ b/Scripts/Brando04d1.swift @@ -0,0 +1,121 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + + + +/// Brando04: NNClassifier training +struct Brando04d1: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + + @Flag(help: "Training mode") + var training: Bool = false + + let num_boxes: Int = 10000 + let pert = Vector3(0.0, 30, 0) + + func getTrainingDataBG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("bg") + let frames_obbs = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes) + var bgBoxes = [Tensor]() + for i in 0...frames_obbs.count-1 { + var obb = frames_obbs[i].obb + obb.center.perturbWith(stddev: pert) + bgBoxes.append(frames_obbs[i].frame!.patch(at: obb)) + + } + + print("bg2") + let labels = Tensor(ones: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + + func getTrainingDataFG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("fg") + let frames_obbs = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes) + var fgBoxes = [Tensor]() + for i in 0...frames_obbs.count-1 { + var obb = frames_obbs[i].obb + obb.center.perturbWith(stddev: pert) + fgBoxes.append(frames_obbs[i].frame!.patch(at: obb)) + + } + + print("bg2") + let labels = Tensor(ones: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: fgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + + func run() { + let folderName = "classifiers/classifiers_today" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } else { + print("folder exists") + } + + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset) + var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset) + + + var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked()) + var labels = Tensor(concatenate(labels_bg, labels_fg)) + print("shape of patches", patches.shape) + print("shape of labels", labels.shape) + + let kHiddenDimension = 512 + let featSize = 512 + let iterations = [1] + + + let lr = Float(1e-6) + for i in iterations { + let pretrained_weights = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr).npy" + let path = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy" + if FileManager.default.fileExists(atPath: path) { + print("File Already Exists. Abort training") + continue + } + print("Training...") + let rae: PretrainedNNClassifier = PretrainedNNClassifier( + patches: patches, + labels: labels, + given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: pretrained_weights, learningRate: lr), + train_mode: "pretrained" + ) + rae.save(to: path) + + } + + + + } +} diff --git a/Scripts/Brando10.swift b/Scripts/Brando10.swift index 903fad9d..827f8dcd 100644 --- a/Scripts/Brando10.swift +++ b/Scripts/Brando10.swift @@ -44,11 +44,6 @@ struct Brando10: ParsableCommand { var (figs, axs) = plt.subplots(1,1, figsize: Python.tuple([10, 4])).tuple2 - // axs[0].plot(t,posex_np, linewidth: 1) - // axs[0].set_title("x and y coordinates") - // axs[0].plot(t,posey_np, linewidth: 1) - // axs[1].set_title("theta") - // axs[1].plot(t,posetheta_np, linewidth: 1) axs[0].plot(t,error_np, linewidth: 1) axs[0].set_title("error") plt.subplots_adjust(left:0.1, @@ -57,7 +52,6 @@ struct Brando10: ParsableCommand { top:0.9, wspace:0.4, hspace:0.4) - // axs[2].setylim(-200,50) figs.savefig(folderName + "/sampling_figure_\(i).png") plt.close("all") @@ -66,19 +60,5 @@ struct Brando10: ParsableCommand { - // for featureSize in featureSizes { - // for kHiddenDimension in kHiddenDimensions { - // for j in iterations { - - - - - - // } - // } - // } - - - } } \ No newline at end of file diff --git a/Scripts/Brando12.swift b/Scripts/Brando12.swift index e4474f5b..83509c84 100644 --- a/Scripts/Brando12.swift +++ b/Scripts/Brando12.swift @@ -7,7 +7,7 @@ import PythonKit import Foundation import PenguinStructures -/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION +/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION [red & green dots] struct Brando12: ParsableCommand { @Option(help: "Run for number of frames") var trackLength: Int = 80 @@ -51,10 +51,8 @@ struct Brando12: ParsableCommand { let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! let frames = testData.frames let firstTrack = testData.tracks[0] - // let firstTrack = testData.tracks[5] let firstFrame = frames[0] let firstObb = firstTrack.boxes[0] - // let firstObb = firstTrack.boxes[5] //OPTIMIZER GRADIENT DESCENT @@ -91,7 +89,6 @@ struct Brando12: ParsableCommand { for j in 0...1 { axs[i,j].imshow(fr / 255.0, cmap: "gray") let firstGroundTruth = firstObb.center - // axs[i,j].plot(firstObb.corners.map{$0.x} + [firstObb.corners.first!.x], firstObb.corners.map{$0.y} + [firstObb.corners.first!.y], "b-") axs[i,j].set_xlim(firstGroundTruth.t.x - 50, firstGroundTruth.t.x + 50) axs[i,j].set_ylim(firstGroundTruth.t.y - 50, firstGroundTruth.t.y + 50) axs[i,j].get_xaxis().set_visible(false) @@ -159,9 +156,7 @@ struct Brando12: ParsableCommand { let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { - // plot a green dot - // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") - // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { @@ -173,8 +168,7 @@ struct Brando12: ParsableCommand { } } else { - // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") - // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { diff --git a/Scripts/Brando14.swift b/Scripts/Brando14.swift index 53944f01..70d64f2b 100644 --- a/Scripts/Brando14.swift +++ b/Scripts/Brando14.swift @@ -1,9 +1,3 @@ -// NN Classifier -// Load 1st image -// Load Classifier -// take the error value at each pixel in cropped image -// plot the error value on the image from white to red - import ArgumentParser import SwiftFusion import BeeDataset @@ -13,7 +7,7 @@ import PythonKit import Foundation import PenguinStructures -/// Brando12: OPTIMIZATION CONVERGENCE VISUALIZATION +/// Brando14: ERRORVALUE over entire image struct Brando14: ParsableCommand { @Option(help: "Run for number of frames") var trackLength: Int = 80 @@ -33,16 +27,14 @@ struct Brando14: ParsableCommand { let data = OISTBeeVideo(directory: dataDir, length: trainingDatasetSize)! let frames = testData.frames let firstTrack = testData.tracks[0] - // let firstTrack = testData.tracks[5] let firstFrame = frames[0] let firstObb = firstTrack.boxes[0] - // let firstObb = firstTrack.boxes[5] let range = 100.0 // NN Params let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) - let featureSize = 256 + let featureSize = 512 let kHiddenDimension = 512 @@ -53,8 +45,8 @@ struct Brando14: ParsableCommand { } else { str = "RAE" } - // let folderName = "Results/ErrorValueVizualized_\(str)_\(kHiddenDimension)_\(featureSize)_5" - let folderName = "Results/ErrorValueVizualized_\(str)Small_\(featureSize)_1" + let lr = 1e-6 + let folderName = "Results/ErrorValueVizualized_\(str)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy" if !FileManager.default.fileExists(atPath: folderName) { do { try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) @@ -77,8 +69,6 @@ struct Brando14: ParsableCommand { axs[0].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) axs[0].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) - // axs[1].set_xlim(firstGroundTruth.t.x - range/2, firstGroundTruth.t.x + range/2) - // axs[1].set_ylim(firstGroundTruth.t.y - range/2, firstGroundTruth.t.y + range/2) axs[1].set_xlim(0, range) axs[1].set_ylim(0, range) @@ -90,18 +80,13 @@ struct Brando14: ParsableCommand { var values = Tensor(zeros: [Int(range), Int(range)]) - // var values = Tensor(zeros:firstFrame.shape) print("printing tensor",values) if useClassifier { - // var classifier = NNClassifier( - // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize - // ) - var classifier = SmallerNNClassifier( - imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize - ) - // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_5.npy", allow_pickle: true)) - classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_1.npy", allow_pickle: true)) + var classifier = NNClassifier( + imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize + ) + classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy", allow_pickle: true)) print("done loading") for i in 0...Int(range)-1 { @@ -121,11 +106,8 @@ struct Brando14: ParsableCommand { appearanceModelSize: (40, 70) ) fg.store(factorNNC) - // print("values at ij", values[i,j], factorNNC.errorVector(v[poseId]).x) - // print("error vector", Tensor([factorNNC.errorVector(v[poseId]).x])) - // print("value", (values[Int(x-range/2)+i,Int(y-range/2)+j])) - // values[Int(x-range/2)+i,Int(y-range/2)+j] = Tensor([factorNNC.errorVector(v[poseId]).x]) values[i,j] = Tensor(factorNNC.errorVector(v[poseId]).x) + // print(Tensor(factorNNC.errorVector(v[poseId]).x)) @@ -134,7 +116,6 @@ struct Brando14: ParsableCommand { } print("row", i) } - // print(values[0...,0]) let min_val = values.min() if Double(min_val)! < 0 { values = values-min_val @@ -143,18 +124,6 @@ struct Brando14: ParsableCommand { print(values[0...,0]) print(values.shape) axs[1].imshow(values.makeNumpyArray()) - - - - // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" - // + "\n label err = \(label_err).x)" - // + "\n start err = \(start_err)" - // + "\n learning rate = \(lr)" - // + "\n converged = \(conv)") - // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") - // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) - // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") - // plt.close("all") fig.savefig(folderName + "/vizual_NNC.png", bbox_inches: "tight") @@ -201,7 +170,6 @@ struct Brando14: ParsableCommand { maxPossibleNegativity: 1e7 ) fg.store(factorRAE) - // print("values at ij", values[i,j], factorNNC.errorVector(v[poseId]).x) values[i,j] = Tensor(factorRAE.errorVector(v[poseId]).x) @@ -220,17 +188,6 @@ struct Brando14: ParsableCommand { print(values.shape) axs[1].imshow(values.makeNumpyArray()) - - - // axes.set_title(String(axes.get_title())! + "\n final err = \(final_err)" - // + "\n label err = \(label_err).x)" - // + "\n start err = \(start_err)" - // + "\n learning rate = \(lr)" - // + "\n converged = \(conv)") - // figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") - // // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) - // // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") - // plt.close("all") fig.savefig(folderName + "/vizual_RAE.png", bbox_inches: "tight") diff --git a/Scripts/Brando15.swift b/Scripts/Brando15.swift new file mode 100644 index 00000000..59b9ccf9 --- /dev/null +++ b/Scripts/Brando15.swift @@ -0,0 +1,91 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + + + + +/// Brando15 SAVE PATCHES FOR LATER USE +struct Brando15: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + + @Flag(help: "Training mode") + var training: Bool = false + + let num_boxes: Int = 10000 + + func getTrainingDataBG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("bg") + + // var allBoxes = [LikelihoodModel.Datum]() + let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) + $0.frame!.patch(at: $0.obb) + } + print("bg2") + let labels = Tensor(zeros: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: bgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + func getTrainingDataFG( + from dataset: OISTBeeVideo + ) -> (Tensor, Tensor) { + print("fg") + // var allBoxes = [LikelihoodModel.Datum]() + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes).map { + // (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) + $0.frame!.patch(at: $0.obb) + } + print("bg2") + let labels = Tensor(ones: [num_boxes]) + print("labels done bg") + let patches = Tensor(stacking: fgBoxes.map {$0}) + print("patches done bg") + return (labels, patches) + } + + + + + // Just runs an RP tracker and saves image to file + // Make sure you have a folder `Results/fan12` before running + func run() { + let folderName = "classifiers/classifiers_today" + if !FileManager.default.fileExists(atPath: folderName) { + do { + try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) + } catch { + print(error.localizedDescription) + } + } else { + print("folder exists") + } + + + let dataDir = URL(fileURLWithPath: "./OIST_Data") + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset) + let np = Python.import("numpy") + np.save("Patches_bg_\(num_boxes).npy", patches_bg.makeNumpyArray()) + var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset) + + // var patches = concatenate(patches_bg, patches_fg) + var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked()) + var labels = Tensor(concatenate(labels_bg, labels_fg)) + print("shape of patches", patches.shape) + print("shape of labels", labels.shape) + np.save("Patches_bg_fg_\(num_boxes).npy", patches.makeNumpyArray()) + } +} diff --git a/Scripts/Brando16.swift b/Scripts/Brando16.swift new file mode 100644 index 00000000..43343228 --- /dev/null +++ b/Scripts/Brando16.swift @@ -0,0 +1,126 @@ +import ArgumentParser + +import SwiftFusion +import BeeDataset +import BeeTracking +import TensorFlow +import PythonKit +import Foundation + +import PenguinStructures + +/// PCA tests +struct Brando16: ParsableCommand { + typealias LikelihoodModel = TrackingLikelihoodModel + + @Option(help: "Run for number of frames") + var trackLength: Int = 80 + + + func getTrainingData( + from dataset: OISTBeeVideo, + numberForeground: Int = 4500 + ) -> [LikelihoodModel.Datum] { + let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { + (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) + } + + return fgBoxes + } + + // Runs RAE tracker on n number of sequences and outputs relevant images and statistics + // Make sure you have a folder `Results/andrew01` before running + func run() { + let np = Python.import("numpy") + let pickle = Python.import("pickle") + // used to be 512 + + let (imageHeight, imageWidth, imageChannels) = + (40, 70, 1) + + + var kHiddenDimension = [16, 64, 256] + for dim in kHiddenDimension { + let dataDir = URL(fileURLWithPath: "./OIST_Data") + + let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! + + let trainingData = Tensor(stacking: getTrainingData(from: trainingDataset).map { $0.frame!.patch(at: $0.obb) }) + let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: trackLength)! + + + var statistics = FrameStatistics(Tensor(0.0)) + statistics.mean = Tensor(62.26806976644069) + statistics.standardDeviation = Tensor(37.44683834503672) + let trainingBatch = trainingDataset.makeBatch(statistics: statistics, appearanceModelSize: (imageHeight, imageWidth), batchSize: 4500) + let rae = PCAEncoder(from: trainingBatch, given: dim) + + + + let trackerEvaluation = TrackerEvaluationDataset(testData) + print("s1") + let evalTracker: Tracker = {frames, start in + var tracker = trainProbabilisticTracker( + trainingData: trainingDataset, + encoder: rae, + frames: frames, + boundingBoxSize: (40, 70), + withFeatureSize: dim, + fgRandomFrameCount: 100, + bgRandomFrameCount: 100 + ) + let prediction = tracker.infer(knownStart: Tuple1(start.center), withSampling: true) + let track = tracker.frameVariableIDs.map { OrientedBoundingBox(center: prediction[unpack($0)], rows: 40, cols:70) } + + return track + } + let plt = Python.import("matplotlib.pyplot") + let sequenceCount = 19 + var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") + + for (index, value) in results.sequences.prefix(sequenceCount).enumerated() { + var i: Int = 0 + zip(value.subsequences.first!.frames, zip(value.subsequences.first!.prediction, value.subsequences.first!.groundTruth)).map { + let (fig, axes) = plotFrameWithPatches(frame: $0.0, actual: $0.1.0.center, expected: $0.1.1.center, firstGroundTruth: value.subsequences.first!.groundTruth.first!.center) + fig.savefig("Results/ppca_\(dim)/sequence\(index)/andrew01_\(i).png", bbox_inches: "tight") + plt.close("all") + i = i + 1 + } + + + let (fig, axes) = plt.subplots(1, 2, figsize: Python.tuple([20, 20])).tuple2 + fig.suptitle("Tracking positions and Subsequence Average Overlap with Accuracy \(String(format: "%.2f", value.subsequences.first!.metrics.accuracy)) and Robustness \(value.subsequences.first!.metrics.robustness).") + + value.subsequences.map { + let encoder = JSONEncoder() + let data = try! encoder.encode($0.prediction) + FileManager.default.createFile(atPath: "Results/ppca_\(dim)/prediction_ppca_\(dim)_sequence_\(index).json", contents: data, attributes: nil) + plotPoseDifference( + track: $0.prediction.map{$0.center}, withGroundTruth: $0.groundTruth.map{$0.center}, on: axes[0] + ) + } + plotOverlap( + metrics: value.subsequences.first!.metrics, on: axes[1] + ) + fig.savefig("Results/ppca_\(dim)/andrew01_subsequence\(index).png", bbox_inches: "tight") + print("Accuracy for sequence is \(value.sequenceMetrics.accuracy) with Robustness of \(value.sequenceMetrics.robustness)") + } + + print("Accuracy for all sequences is \(results.trackerMetrics.accuracy) with Robustness of \(results.trackerMetrics.robustness)") + let f = Python.open("Results/ppca_\(dim)/EAO/rp_\(dim).data", "wb") + pickle.dump(results.expectedAverageOverlap.curve, f) + + + } + + } +} + +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple2) -> (A, B) { + return (t.head, t.tail.head) +} +/// Returns `t` as a Swift tuple. +fileprivate func unpack(_ t: Tuple1) -> (A) { + return (t.head) +} \ No newline at end of file diff --git a/Scripts/Brandounittest.swift b/Scripts/Brandounittest.swift deleted file mode 100644 index e69de29b..00000000 diff --git a/Scripts/main.swift b/Scripts/main.swift index bb8f75cd..fe45d6ad 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -17,9 +17,9 @@ import PenguinParallelWithFoundation struct Scripts: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, + subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, Brando04d1.self, Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando09.self, - Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Andrew01.self, + Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Brando15.self, Brando16.self, Andrew01.self, Andrew05.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, Frank01.self, Frank02.self, Frank03.self, Frank04.self]) diff --git a/Sources/BeeTracking/AppearanceRAE+Serialization.swift b/Sources/BeeTracking/AppearanceRAE+Serialization.swift index 66895563..dc3116d6 100644 --- a/Sources/BeeTracking/AppearanceRAE+Serialization.swift +++ b/Sources/BeeTracking/AppearanceRAE+Serialization.swift @@ -93,48 +93,4 @@ extension NNClassifier { self.encoder3.numpyWeights ].reduce([], +) } -} - - -extension SmallerNNClassifier { - /// Loads model weights from the numpy arrays in `weights`. - public mutating func load(weights: PythonObject) { - self.encoder_conv1.load(weights: weights[0..<2]) - self.encoder1.load(weights: weights[2..<4]) - self.encoder2.load(weights: weights[4..<6]) - } - - /// The model weights as numpy arrays. - public var numpyWeights: PythonObject { - [ - self.encoder_conv1.numpyWeights, - self.encoder1.numpyWeights, - self.encoder2.numpyWeights, - ].reduce([], +) - } -} - - - -extension LargerNNClassifier { - /// Loads model weights from the numpy arrays in `weights`. - public mutating func load(weights: PythonObject) { - self.encoder_conv1.load(weights: weights[0..<2]) - self.encoder1.load(weights: weights[2..<4]) - self.encoder2.load(weights: weights[4..<6]) - self.encoder3.load(weights: weights[6..<8]) - self.encoder4.load(weights: weights[8..<10]) - - } - - /// The model weights as numpy arrays. - public var numpyWeights: PythonObject { - [ - self.encoder_conv1.numpyWeights, - self.encoder1.numpyWeights, - self.encoder2.numpyWeights, - self.encoder3.numpyWeights, - self.encoder4.numpyWeights - ].reduce([], +) - } -} +} \ No newline at end of file diff --git a/Sources/BeeTracking/NNClassifier.swift b/Sources/BeeTracking/NNClassifier.swift index c508099c..cf49f0b7 100644 --- a/Sources/BeeTracking/NNClassifier.swift +++ b/Sources/BeeTracking/NNClassifier.swift @@ -23,14 +23,14 @@ import BeeDataset public struct BeeBatch { let patch: Tensor - let label: Tensor + let label: Tensor } /// Conform `IrisBatch` to `Collatable` so that we can load it into a `TrainingEpoch`. extension BeeBatch: Collatable { public init(collating samples: BatchSamples) where BatchSamples.Element == Self { patch = Tensor(stacking: samples.map{$0.patch}) - label = Tensor(stacking: samples.map{$0.label}) + label = Tensor(stacking: samples.map{$0.label}) } } @@ -92,7 +92,7 @@ public struct NNClassifier: Layer{ encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) - + encoder1 = Dense( inputSize: imageHeight * imageWidth * imageChannels / 4, outputSize: hiddenDimension, @@ -110,22 +110,26 @@ public struct NNClassifier: Layer{ } /// Initialize given an image batch - public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int) + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float) // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { - public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) { + public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil, train_mode: String) { print("init from image batch") - // let shape = imageBatch.shape - // precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") - // let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) - // print("sizes", H_, W_, C_) - // training data shape [600, 40, 70, 1] let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) - let (h,d) = parameters ?? (100,10) + let h = parameters!.hiddenDimension + let d = parameters!.latentDimension var model = NNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, hiddenDimension: h, latentDimension: d) + if train_mode == "pretrained" { + print("PRETRAINED") + let np = Python.import("numpy") + print("loading pretrained weights") + model.load(weights: np.load(parameters!.weightFile, allow_pickle: true)) + } + + let optimizer = Adam(for: model) - optimizer.learningRate = 1e-3 + optimizer.learningRate = parameters!.learningRate let lossFunc = NNClassifierLoss() // Issues I came across: TrainingEpochs function was scrambling the order @@ -134,36 +138,19 @@ public struct NNClassifier: Layer{ // Thread-local variable that model layers read to know their mode Context.local.learningPhase = .training - // print("Shape of imagebatch", imageBatch.shape) - // print("Shape of imagebatch", imageBatch.unstacked().count) let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array // var trainLossResults: [Double] = [] - let epochCount = 600 + let epochCount = 100 for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { - // print("epoch") - // print(epochIndex) var epochLoss: Double = 0 var batchCount: Int = 0 - // epoch is a Slices object, see below - // print("encoder 1", model.encoder1) - // print("encoder 2", model.encoder2) - // print("encoder 3", model.encoder3) for batchSamples in epoch { - // print(".") let batch = batchSamples.collated - // let batch = Tensor(stacking: batchSamples.map { $0.frame!.patch(at: $0.obb) }) - // let type = [Int32](batchSamples.map { $0.type == TrackingLikelihoodModel.PatchType.bg ? 0 : 1}) - // print("..") let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } - // print("...") optimizer.update(&model, along: grad) - // print("....") - // print("encoder 1", model.encoder1) - // print("encoder 2", model.encoder2) - // print("encoder 3", model.encoder3) epochLoss += loss.scalarized() batchCount += 1 } @@ -199,217 +186,12 @@ public struct NNClassifier: Layer{ } } -/// [1] https://openreview.net/forum?id=S1g7tpEYDS -public struct SmallerNNClassifier: Layer{ - @noDerivative public let imageHeight: Int - @noDerivative public let imageWidth: Int - @noDerivative public let imageChannels: Int - @noDerivative public let latentDimension: Int - public var encoder_conv1: Conv2D - var encoder_pool1: MaxPool2D - public var encoder1: Dense - public var encoder2: Dense - - public init( - imageHeight: Int, imageWidth: Int, imageChannels: Int, latentDimension: Int - ) { - self.imageHeight = imageHeight - self.imageWidth = imageWidth - self.imageChannels = imageChannels - self.latentDimension = latentDimension - - encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) - - encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) - - encoder1 = Dense( - inputSize: imageHeight * imageWidth * imageChannels / 4, - outputSize: latentDimension, - activation: relu) - - encoder2 = Dense( - inputSize: latentDimension, - outputSize: 2) - - } - - /// Initialize given an image batch - public init(patches patches: Tensor, labels labels: Tensor, given latentDimension: Int? = nil) { - print("init from image batch") - let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) - let d = latentDimension ?? 10 - var model = SmallerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, latentDimension: d) - - let optimizer = Adam(for: model) - optimizer.learningRate = 1e-3 - - let lossFunc = NNClassifierLoss() - Context.local.learningPhase = .training - let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) - let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array - var trainLossResults: [Double] = [] - let epochCount = 300 - for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { - var epochLoss: Double = 0 - var batchCount: Int = 0 - for batchSamples in epoch { - let batch = batchSamples.collated - let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } - optimizer.update(&model, along: grad) - epochLoss += loss.scalarized() - batchCount += 1 - } - epochLoss /= Double(batchCount) - trainLossResults.append(epochLoss) - // if epochIndex % 50 == 0 { - print("Epoch \(epochIndex): Loss: \(epochLoss)") - // } - } - - self = model - } - - /// Differentiable encoder - @differentiable(wrt: imageBatch) - public func classify(_ imageBatch: Tensor) -> Tensor { - let batchSize = imageBatch.shape[0] - let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] - precondition( - imageBatch.shape == expectedShape, - "input shape is \(imageBatch.shape), but expected \(expectedShape)") - return imageBatch - .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) - .sequenced(through: encoder1, encoder2) - } - - /// Standard: add syntactic sugar to apply model as a function call. - @differentiable - public func callAsFunction(_ imageBatch: Tensor) -> Tensor { - let output = classify(imageBatch) - return output - } -} - -public struct LargerNNClassifier: Layer{ - @noDerivative public let imageHeight: Int - @noDerivative public let imageWidth: Int - @noDerivative public let imageChannels: Int - @noDerivative public let hiddenDimension: Int - @noDerivative public let latentDimension: Int - public var encoder_conv1: Conv2D - var encoder_pool1: MaxPool2D - public var encoder1: Dense - public var encoder2: Dense - public var encoder3: Dense - public var encoder4: Dense - public init( - imageHeight: Int, imageWidth: Int, imageChannels: Int, - hiddenDimension: Int, latentDimension: Int - ) { - self.imageHeight = imageHeight - self.imageWidth = imageWidth - self.imageChannels = imageChannels - self.hiddenDimension = hiddenDimension - self.latentDimension = latentDimension - - encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) - - encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) - - encoder1 = Dense( - inputSize: imageHeight * imageWidth * imageChannels / 4, - outputSize: hiddenDimension, - activation: relu) - - encoder2 = Dense( - inputSize: hiddenDimension, - outputSize: hiddenDimension, - activation: relu) - - encoder3 = Dense( - inputSize: hiddenDimension, - outputSize: latentDimension, - activation: relu) - - encoder4 = Dense( - inputSize: latentDimension, - outputSize: 2) - - } - - /// Initialize given an image batch - public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int) - // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { - public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) { - print("init from image batch") - let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) - let (h,d) = parameters ?? (100,10) - var model = LargerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, - hiddenDimension: h, latentDimension: d) - let optimizer = Adam(for: model) - optimizer.learningRate = 1e-3 - let lossFunc = NNClassifierLoss() - Context.local.learningPhase = .training - let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) - let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array - // - var trainLossResults: [Double] = [] - let epochCount = 600 - for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { - var epochLoss: Double = 0 - var batchCount: Int = 0 - for batchSamples in epoch { - let batch = batchSamples.collated - let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } - optimizer.update(&model, along: grad) - epochLoss += loss.scalarized() - batchCount += 1 - } - epochLoss /= Double(batchCount) - trainLossResults.append(epochLoss) - if epochIndex % 5 == 0 { - print("\nEpoch \(epochIndex):", terminator:"") - } - print(" \(epochLoss),", terminator: "") - } - - // if NSFileManager.fileExistsAtPath(path) { - // print("File exists") - // } else { - // print("File does not exist") - // } - // np.save("epochloss\()", Tensor(trainLossResults).makeNumpyArray()) - - self = model - } - - /// Differentiable encoder - @differentiable(wrt: imageBatch) - public func classify(_ imageBatch: Tensor) -> Tensor { - let batchSize = imageBatch.shape[0] - let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] - precondition( - imageBatch.shape == expectedShape, - "input shape is \(imageBatch.shape), but expected \(expectedShape)") - return imageBatch - .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) - .sequenced(through: encoder1, encoder2, encoder3, encoder4) - } - - /// Standard: add syntactic sugar to apply model as a function call. - @differentiable - public func callAsFunction(_ imageBatch: Tensor) -> Tensor { - let output = classify(imageBatch) - return output - } -} /// The loss function for the `DenseRAE`. public struct NNClassifierLoss { /// Return the loss of `model` on `imageBatch`. - /// /// Parameter printLoss: Whether to print the loss and its components. @differentiable public func callAsFunction( @@ -417,70 +199,30 @@ public struct NNClassifierLoss { ) -> Tensor { let batchSize = imageBatch.patch.shape[0] let output = model(imageBatch.patch) - let totalLoss = softmaxCrossEntropy(logits: output, labels: imageBatch.label) + let totalLoss = softmaxCrossEntropy(logits: output, labels: Tensor(imageBatch.label)) return totalLoss } - @differentiable - public func callAsFunction( - _ model: LargerNNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false - ) -> Tensor { - let batchSize = imageBatch.patch.shape[0] - let output = model(imageBatch.patch) - let totalLoss = softmaxCrossEntropy(logits: output, labels: imageBatch.label) - return totalLoss - } - - - @differentiable - public func callAsFunction( - _ model: SmallerNNClassifier, _ imageBatch: BeeBatch, printLoss: Bool = false - ) -> Tensor { - let batchSize = imageBatch.patch.shape[0] - let output = model(imageBatch.patch) - let totalLoss = softmaxCrossEntropy(logits: output, labels: imageBatch.label) - return totalLoss - } } extension NNClassifier: Classifier {} -extension SmallerNNClassifier : Classifier {} -extension LargerNNClassifier: Classifier {} public struct PretrainedNNClassifier : Classifier{ public var inner: NNClassifier - /// The constructor that only does loading of the pretrained weights. - public init(from imageBatch: Tensor, given: HyperParameters?) { - let shape = imageBatch.shape - precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") - let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) - if let params = given { - var encoder = NNClassifier( - imageHeight: H_, imageWidth: W_, imageChannels: 1, - hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension - ) - - let np = Python.import("numpy") - - encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) - inner = encoder - } else { - inner = NNClassifier( - imageHeight: H_, imageWidth: W_, imageChannels: 1, - hiddenDimension: 1, latentDimension: 1 - ) - fatalError("Must provide hyperparameters to pretrained network") - } - } - /// Constructor that does training of the network - public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { + public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters, train_mode: String) { inner = NNClassifier( - patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil + patches: patches, labels: labels, given: (given != nil) ? + (hiddenDimension: given.hiddenDimension, + latentDimension: given.latentDimension, + weightFile: given.weightFile, + learningRate: given.learningRate) : nil, train_mode: train_mode ) + + } /// Save the weight to file @@ -495,111 +237,371 @@ public struct PretrainedNNClassifier : Classifier{ } /// Initialize given an image batch - public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String) + public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String, learningRate: Float) } -public struct PretrainedSmallerNNClassifier : Classifier{ - public var inner: SmallerNNClassifier - - /// The constructor that only does loading of the pretrained weights. - public init(from imageBatch: Tensor, given: HyperParameters?) { - let shape = imageBatch.shape - precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") - let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) - if let params = given { - var encoder = SmallerNNClassifier( - imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: params.latentDimension - ) - let np = Python.import("numpy") - encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) - inner = encoder - } else { - inner = SmallerNNClassifier( - imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: 1 - ) - fatalError("Must provide hyperparameters to pretrained network") - } - } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +// /// [1] https://openreview.net/forum?id=S1g7tpEYDS +// public struct SmallerNNClassifier: Layer{ +// @noDerivative public let imageHeight: Int +// @noDerivative public let imageWidth: Int +// @noDerivative public let imageChannels: Int +// @noDerivative public let latentDimension: Int +// public var encoder_conv1: Conv2D +// var encoder_pool1: MaxPool2D +// public var encoder1: Dense +// public var encoder2: Dense + +// public init( +// imageHeight: Int, imageWidth: Int, imageChannels: Int, latentDimension: Int +// ) { +// self.imageHeight = imageHeight +// self.imageWidth = imageWidth +// self.imageChannels = imageChannels +// self.latentDimension = latentDimension + +// encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + +// encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + +// encoder1 = Dense( +// inputSize: imageHeight * imageWidth * imageChannels / 4, +// outputSize: latentDimension, +// activation: relu) + +// encoder2 = Dense( +// inputSize: latentDimension, +// outputSize: 2) + +// } + +// /// Initialize given an image batch +// public init(patches patches: Tensor, labels labels: Tensor, given latentDimension: Int? = nil) { +// print("init from image batch") +// let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) +// let d = latentDimension ?? 10 +// var model = SmallerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, latentDimension: d) + +// let optimizer = Adam(for: model) +// optimizer.learningRate = 1e-3 + +// let lossFunc = NNClassifierLoss() +// Context.local.learningPhase = .training +// let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) +// let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array +// var trainLossResults: [Double] = [] +// let epochCount = 600 +// for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { +// var epochLoss: Double = 0 +// var batchCount: Int = 0 +// for batchSamples in epoch { +// let batch = batchSamples.collated +// let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } +// optimizer.update(&model, along: grad) +// epochLoss += loss.scalarized() +// batchCount += 1 +// } +// epochLoss /= Double(batchCount) +// trainLossResults.append(epochLoss) +// // if epochIndex % 50 == 0 { +// print("Epoch \(epochIndex): Loss: \(epochLoss)") +// // } +// } + +// self = model +// } + +// /// Differentiable encoder +// @differentiable(wrt: imageBatch) +// public func classify(_ imageBatch: Tensor) -> Tensor { +// let batchSize = imageBatch.shape[0] +// let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] +// precondition( +// imageBatch.shape == expectedShape, +// "input shape is \(imageBatch.shape), but expected \(expectedShape)") +// return imageBatch +// .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) +// .sequenced(through: encoder1, encoder2) +// } + +// /// Standard: add syntactic sugar to apply model as a function call. +// @differentiable +// public func callAsFunction(_ imageBatch: Tensor) -> Tensor { +// let output = classify(imageBatch) +// return output +// } +// } + +// public struct LargerNNClassifier: Layer{ +// @noDerivative public let imageHeight: Int +// @noDerivative public let imageWidth: Int +// @noDerivative public let imageChannels: Int +// @noDerivative public let hiddenDimension: Int +// @noDerivative public let latentDimension: Int +// public var encoder_conv1: Conv2D +// var encoder_pool1: MaxPool2D +// public var encoder1: Dense +// public var encoder2: Dense +// public var encoder3: Dense +// public var encoder4: Dense +// public init( +// imageHeight: Int, imageWidth: Int, imageChannels: Int, +// hiddenDimension: Int, latentDimension: Int +// ) { +// self.imageHeight = imageHeight +// self.imageWidth = imageWidth +// self.imageChannels = imageChannels +// self.hiddenDimension = hiddenDimension +// self.latentDimension = latentDimension + +// encoder_conv1 = Conv2D(filterShape: (3, 3, imageChannels, imageChannels), padding: .same, activation: relu) + +// encoder_pool1 = MaxPool2D(poolSize: (2, 2), strides: (2, 2), padding: .same) + +// encoder1 = Dense( +// inputSize: imageHeight * imageWidth * imageChannels / 4, +// outputSize: hiddenDimension, +// activation: relu) + +// encoder2 = Dense( +// inputSize: hiddenDimension, +// outputSize: hiddenDimension, +// activation: relu) + +// encoder3 = Dense( +// inputSize: hiddenDimension, +// outputSize: latentDimension, +// activation: relu) + +// encoder4 = Dense( +// inputSize: latentDimension, +// outputSize: 2) + +// } + +// /// Initialize given an image batch +// public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int) +// // public init(from imageBatch: Tensor, given parameters: HyperParameters? = nil) { +// public init(patches patches: Tensor, labels labels: Tensor, given parameters: HyperParameters? = nil) { +// print("init from image batch") +// let (H_, W_, C_) = (patches.shape[1], patches.shape[2], 1) +// let (h,d) = parameters ?? (100,10) +// var model = LargerNNClassifier(imageHeight: H_, imageWidth: W_, imageChannels: C_, +// hiddenDimension: h, latentDimension: d) +// let optimizer = Adam(for: model) +// optimizer.learningRate = 1e-3 +// let lossFunc = NNClassifierLoss() +// Context.local.learningPhase = .training +// let trainingData : [BeeBatch] = (zip(patches.unstacked(), labels.unstacked()).map{BeeBatch(patch: $0.0, label: $0.1)}) +// let epochs = TrainingEpochs(samples: trainingData, batchSize: 200) // this is an array +// // +// var trainLossResults: [Double] = [] +// let epochCount = 600 +// for (epochIndex, epoch) in epochs.prefix(epochCount).enumerated() { +// var epochLoss: Double = 0 +// var batchCount: Int = 0 +// for batchSamples in epoch { +// let batch = batchSamples.collated +// let (loss, grad) = valueWithGradient(at: model) { lossFunc($0, batch) } +// optimizer.update(&model, along: grad) +// epochLoss += loss.scalarized() +// batchCount += 1 +// } +// epochLoss /= Double(batchCount) +// trainLossResults.append(epochLoss) +// if epochIndex % 5 == 0 { +// print("\nEpoch \(epochIndex):", terminator:"") +// } +// print(" \(epochLoss),", terminator: "") +// } + +// // if NSFileManager.fileExistsAtPath(path) { +// // print("File exists") +// // } else { +// // print("File does not exist") +// // } +// // np.save("epochloss\()", Tensor(trainLossResults).makeNumpyArray()) + +// self = model +// } + +// /// Differentiable encoder +// @differentiable(wrt: imageBatch) +// public func classify(_ imageBatch: Tensor) -> Tensor { +// let batchSize = imageBatch.shape[0] +// let expectedShape: TensorShape = [batchSize, imageHeight, imageWidth, imageChannels] +// precondition( +// imageBatch.shape == expectedShape, +// "input shape is \(imageBatch.shape), but expected \(expectedShape)") +// return imageBatch +// .sequenced(through: encoder_conv1, encoder_pool1).reshaped(to: [batchSize, imageHeight * imageWidth * imageChannels / 4]) +// .sequenced(through: encoder1, encoder2, encoder3, encoder4) +// } + +// /// Standard: add syntactic sugar to apply model as a function call. +// @differentiable +// public func callAsFunction(_ imageBatch: Tensor) -> Tensor { +// let output = classify(imageBatch) +// return output +// } +// } + + + + + + +// public struct PretrainedSmallerNNClassifier : Classifier{ +// public var inner: SmallerNNClassifier - /// Constructor that does training of the network - public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { - inner = SmallerNNClassifier( - patches: patches, labels: labels, given: (given != nil) ? (given!.latentDimension) : nil - ) - } +// /// The constructor that only does loading of the pretrained weights. +// public init(from imageBatch: Tensor, given: HyperParameters?) { +// let shape = imageBatch.shape +// precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") +// let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) +// if let params = given { +// var encoder = SmallerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: params.latentDimension +// ) + +// let np = Python.import("numpy") + +// encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) +// inner = encoder +// } else { +// inner = SmallerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, latentDimension: 1 +// ) +// fatalError("Must provide hyperparameters to pretrained network") +// } +// } - /// Save the weight to file - public func save(to path: String) { - let np = Python.import("numpy") - np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) - } - - @differentiable - public func classify(_ imageBatch: Tensor) -> Tensor { - inner.classify(imageBatch) - } +// /// Constructor that does training of the network +// public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { +// inner = SmallerNNClassifier( +// patches: patches, labels: labels, given: (given != nil) ? (given!.latentDimension) : nil +// ) +// } +// /// Save the weight to file +// public func save(to path: String) { +// let np = Python.import("numpy") +// np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) +// } + +// @differentiable +// public func classify(_ imageBatch: Tensor) -> Tensor { +// inner.classify(imageBatch) +// } - /// Initialize given an image batch - public typealias HyperParameters = (latentDimension: Int, weightFile: String) -} + +// /// Initialize given an image batch +// public typealias HyperParameters = (latentDimension: Int, weightFile: String) +// } -public struct PretrainedLargerNNClassifier : Classifier{ - public var inner: LargerNNClassifier +// public struct PretrainedLargerNNClassifier : Classifier{ +// public var inner: LargerNNClassifier - /// The constructor that only does loading of the pretrained weights. - public init(from imageBatch: Tensor, given: HyperParameters?) { - let shape = imageBatch.shape - precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") - let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) - if let params = given { - var encoder = LargerNNClassifier( - imageHeight: H_, imageWidth: W_, imageChannels: 1, - hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension - ) - - let np = Python.import("numpy") - - encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) - inner = encoder - } else { - inner = LargerNNClassifier( - imageHeight: H_, imageWidth: W_, imageChannels: 1, - hiddenDimension: 1, latentDimension: 1 - ) - fatalError("Must provide hyperparameters to pretrained network") - } - } +// /// The constructor that only does loading of the pretrained weights. +// public init(from imageBatch: Tensor, given: HyperParameters?) { +// let shape = imageBatch.shape +// precondition(imageBatch.rank == 4, "Wrong image shape \(shape)") +// let (_, H_, W_, C_) = (shape[0], shape[1], shape[2], shape[3]) +// if let params = given { +// var encoder = LargerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, +// hiddenDimension: params.hiddenDimension, latentDimension: params.latentDimension +// ) + +// let np = Python.import("numpy") + +// encoder.load(weights: np.load(params.weightFile, allow_pickle: true)) +// inner = encoder +// } else { +// inner = LargerNNClassifier( +// imageHeight: H_, imageWidth: W_, imageChannels: 1, +// hiddenDimension: 1, latentDimension: 1 +// ) +// fatalError("Must provide hyperparameters to pretrained network") +// } +// } - /// Constructor that does training of the network - public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { - inner = LargerNNClassifier( - patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil - ) - } +// /// Constructor that does training of the network +// public init(patches patches: Tensor, labels labels: Tensor, given: HyperParameters?) { +// inner = LargerNNClassifier( +// patches: patches, labels: labels, given: (given != nil) ? (hiddenDimension: given!.hiddenDimension, latentDimension: given!.latentDimension) : nil +// ) +// } - /// Save the weight to file - public func save(to path: String) { - let np = Python.import("numpy") - np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) - } - - @differentiable - public func classify(_ imageBatch: Tensor) -> Tensor { - inner.classify(imageBatch) - } +// /// Save the weight to file +// public func save(to path: String) { +// let np = Python.import("numpy") +// np.save(path, np.array(inner.numpyWeights, dtype: Python.object)) +// } + +// @differentiable +// public func classify(_ imageBatch: Tensor) -> Tensor { +// inner.classify(imageBatch) +// } - /// Initialize given an image batch - public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String) -} \ No newline at end of file +// /// Initialize given an image batch +// public typealias HyperParameters = (hiddenDimension: Int, latentDimension: Int, weightFile: String) +// } \ No newline at end of file diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift index f9bd7e19..ad31671f 100644 --- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift +++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift @@ -83,16 +83,23 @@ extension OISTBeeVideo { patchSize: (Int, Int), batchSize: Int = 200 ) -> [(frame: Tensor?, obb: OrientedBoundingBox)] { - print("hello0") /// Anything not completely overlapping labels var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42) let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy) - print("hello0.5") + + print("OISTBeeVideo+Batches.swift. MakeForegroundBoundingBoxes") + let label = frames[0].1.labels[0] + + + + var label_count = 0 + for i in 0...(frames.count-1) { + label_count += frames[i].1.labels.count + } // We need `batchSize / frames.count` patches from each frame, plus the remainder of the // integer division. var patchesPerFrame = Array(repeating: batchSize / frames.count, count: frames.count) patchesPerFrame[0] += batchSize % frames.count - print("hello1") /// Samples bounding boxes randomly from each frame /// returns array of (ref to frame, oriented bounding box) @@ -105,8 +112,6 @@ extension OISTBeeVideo { rows: patchSize.0, cols: patchSize.1)) } } - print("hello2") - return obbs } @@ -121,6 +126,14 @@ extension OISTBeeVideo { var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42) let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy) + print("OISTBeeVideo+Batches.swift. MakeBackgroundBoundingBoxes") + let label = frames[0].1.labels[0] + + + var label_count = 0 + for i in 0...(frames.count-1) { + label_count += frames[i].1.labels.count + } // We need `batchSize / frames.count` patches from each frame, plus the remainder of the // integer division. diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift index 05b55c98..f1506132 100644 --- a/Sources/BeeTracking/ProbabilisticTracker.swift +++ b/Sources/BeeTracking/ProbabilisticTracker.swift @@ -228,7 +228,7 @@ public func makeProbabilisticTracker< addFixedBetweenFactor: { (values, variables, graph) -> () in let (prior) = unpack(values) let (poseID) = unpack(variables) - graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 8, sdTheta:0.4)) + graph.store(WeightedPriorFactorPose2SD(poseID, prior, sdX: 8, sdY: 4.6, sdTheta: 0.3)) }) } diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index 62942d3a..9bc8bc09 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -174,9 +174,9 @@ public struct TrackingConfiguration { self.addFixedBetweenFactor = addFixedBetweenFactor! // For LM - // self.optimizer.precision = 1e-1 - // self.optimizer.max_iteration = 100 - // self.optimizer.cgls_precision = 1e-5 + self.optimizer.precision = 1e-1 + self.optimizer.max_iteration = 100 + self.optimizer.cgls_precision = 1e-5 } /// Returns a `FactorGraph` for the tracking problem on the frames at `frameIndices`. @@ -201,57 +201,16 @@ public struct TrackingConfiguration { // Sample from motion model and take best pose var bestError = g.error(at: x) - // var posex = [Double]() - // var posey = [Double]() - // var posetheta = [Double]() - // var error = [Double]() - // var besterror = [Double]() - // time x , time y , time theta , time error - for _ in 0..<2000 { //2000 + for _ in 0..<256 { //2000 x[currentPoseID] = x[previousPoseID] x[currentPoseID].perturbWith(stddev: Vector3(0.3, 8, 4.6)) let candidateError = g.error(at: x) - /// - // print("x", x) - // print("theta", x[currentPoseID].rot.theta, "vector", x[currentPoseID].t.x, x[currentPoseID].t.y) - // print("g.error(at: x)", g.error(at: x)) - // print("frame", i) - - /// if candidateError < bestError { bestError = candidateError bestPose = x[currentPoseID] } - - // APPEND CURRENT ERROR - // posex.append(x[currentPoseID].t.x) - // posey.append(x[currentPoseID].t.y) - // posetheta.append(x[currentPoseID].rot.theta) - // error.append(candidateError) - // besterror.append(bestError) } x[currentPoseID] = bestPose - // let np = Python.import("numpy") - // let posex_np = Tensor(posex).makeNumpyArray() - // let posey_np = Tensor(posey).makeNumpyArray() - // let posetheta_np = Tensor(posetheta).makeNumpyArray() - // let error_np = Tensor(error).makeNumpyArray() - // let besterror_np = Tensor(besterror).makeNumpyArray() - - // let folderName = "sampling" - // if !FileManager.default.fileExists(atPath: folderName) { - // do { - // try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - // } catch { - // print(error.localizedDescription) - // } - // } - - // np.save("./sampling/sampling_frame_\(i)_posex.npy", posex_np) - // np.save("./sampling/sampling_frame_\(i)_posey.npy", posey_np) - // np.save("./sampling/sampling_frame_\(i)_posetheta.npy", posetheta_np) - // np.save("./sampling/sampling_frame_\(i)_error.npy", error_np) - // np.save("./sampling/sampling_frame_\(i)_besterror.npy", besterror_np) } @@ -292,7 +251,7 @@ public struct TrackingConfiguration { // Initialize the variables one frame at a time. Each iteration intializes the `i+1`-th // variable. for i in 0..<(frames.count - 1) { - print("Inferring for frame \(i + 1) of \(frames.count - 1)") + print("Infr \(i + 1)/\(frames.count - 1) ", terminator: "") extendTrack(x: &x, fromFrame:i, withSampling:samplingFlag) } diff --git a/Sources/SwiftFusion/Inference/FactorsStorage.swift b/Sources/SwiftFusion/Inference/FactorsStorage.swift index 0f1c4d78..3561c2af 100644 --- a/Sources/SwiftFusion/Inference/FactorsStorage.swift +++ b/Sources/SwiftFusion/Inference/FactorsStorage.swift @@ -62,9 +62,9 @@ extension ArrayStorage where Element: VectorFactor { let (lFactor, lVars) = factor.linearizableComponent(at: vars) let gradIndices = LVariables.linearized(lFactor.edges) let grads = GradVariables(at: gradIndices, in: GradVariables.withoutMutation(gradBufs)) - let newGrads = grads + gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1).x } + // let newGrads = grads + gradient(at: lVars) { (lFactor.errorVector(at: $0) as! Vector1).x } // print("FactorsStorage", lFactor.errorVector(at: lVars)) - // let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } + let newGrads = grads + gradient(at: lVars) { lFactor.errorVector(at: $0).squaredNorm } newGrads.assign(into: gradIndices, in: gradBufs) } } diff --git a/Sources/SwiftFusion/Optimizers/LM.swift b/Sources/SwiftFusion/Optimizers/LM.swift index 65633e11..7beed981 100644 --- a/Sources/SwiftFusion/Optimizers/LM.swift +++ b/Sources/SwiftFusion/Optimizers/LM.swift @@ -82,11 +82,7 @@ public struct LM { var inner_iter_step = 0 var inner_success = false var all_done = false - var i = 0 for _ in 0..(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) - let bgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) - - let outfg = classifier.classify(fgpatches) - let outbg = classifier.classify(bgpatches) - let shapefg = outfg.shape - let shapebg = outbg.shape - print("fg", outfg) - print("bg", outbg) - XCTAssertEqual(outfg.shape, outbg.shape) - XCTAssertEqual(outbg.shape, [batchSize, 2]) - - var fgsum0 = 0.0 - var fgsum1 = 0.0 - var bgsum0 = 0.0 - var bgsum1 = 0.0 - for i in 0...batchSize-1 { - fgsum0 += Double(outfg[i,0])! - fgsum1 += Double(outfg[i,1])! - bgsum0 += Double(outbg[i,0])! - bgsum1 += Double(outbg[i,1])! - } - // Make sure classifier is working better than 50% - XCTAssertGreaterThan(fgsum1,fgsum0) - XCTAssertGreaterThan(bgsum0,bgsum1) - - - - - } - - - -} diff --git a/Tests/BrandoTests/NNClassifierTests2.swift b/Tests/BrandoTests/NNClassifierTests2.swift deleted file mode 100644 index a4e0edae..00000000 --- a/Tests/BrandoTests/NNClassifierTests2.swift +++ /dev/null @@ -1,60 +0,0 @@ -// import TensorFlow -// import XCTest -// import PythonKit - -// import BeeTracking - -// class NNClassifierTests2: XCTestCase { -// /// Test that the hand-coded Jacobian for the decode method gives the same results as the -// /// AD-generated Jacobian. -// func testClassifier() { -// // Size of the images. -// let np = Python.import("numpy") -// let kHiddenDimension = 2 -// let featureSize = 2 -// // used to be 512 -// print(softmax(Tensor([5,-5,10,-10]))) - -// let (imageHeight, imageWidth, imageChannels) = -// (8, 8, 1) -// var images: Tensor = .init(zeros: [6000, 8, 8, 1]) -// images[3000...6000, 0..., 0...8, 0...1] = .init(ones: [3000,8,8,1]) -// // print("image at index", images[3000,0...,0...,0...]) -// var labels: Tensor = .init(zeros: [6000]) -// labels[3000...6000] = .init(ones: [3000]) - - -// var classifier = NNClassifier( -// imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, -// hiddenDimension: kHiddenDimension, latentDimension: featureSize -// ) -// print("training data done") - -// print("Training...") -// let rae: PretrainedNNClassifier = PretrainedNNClassifier( -// patches: images, -// labels: labels, -// given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featureSize, weightFile: "") -// ) -// rae.save(to: "./classifier_weight_test_\(featureSize).npy") -// print("saved") - - - - - - - - - -// //Tests: does it classify between 1 and 0. -// //Tests: does it classify an 8by8 white vs black images. feature size = 1 latent dim = 1. -// //Tests: does it classify bees correctly. -// //Tracking factor: train classifier for a 3by3 image. 8by8. -// //Swift run - -// // Pass all the unit vectors throught the AD-generated pullback function and check that the -// // results match the hand-coded Jacobian. - -// } -// } diff --git a/Tests/BrandoTests/TrackingTests.swift b/Tests/BrandoTests/TrackingTests.swift deleted file mode 100644 index e69de29b..00000000 From f7ed2d0a81c5e5deea9e05e31cef736e7b407ae2 Mon Sep 17 00:00:00 2001 From: icourten3 Date: Thu, 9 Dec 2021 10:50:36 -0500 Subject: [PATCH 13/34] removed BrandoTests --- Package.swift | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Package.swift b/Package.swift index b4b75551..762940f8 100644 --- a/Package.swift +++ b/Package.swift @@ -99,16 +99,6 @@ let package = Package( path: "Scripts", exclude: ["README.md"] ), - .testTarget( - name: "BrandoTests", - dependencies: [ - "SwiftFusion", - "BeeDataset", - "BeeTracking", - .product(name: "PenguinTesting", package: "Penguin"), - "ModelSupport", - ] - ), .testTarget( name: "SwiftFusionTests", dependencies: [ From 778f89b4db1759cf425dc1dc42ed72fd70456562 Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:13:13 -0500 Subject: [PATCH 14/34] Update Andrew01.swift --- Scripts/Andrew01.swift | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift index 2119ce05..40676829 100644 --- a/Scripts/Andrew01.swift +++ b/Scripts/Andrew01.swift @@ -16,7 +16,6 @@ struct Andrew01: ParsableCommand { @Option(help: "Size of feature space") var featureSize: Int = 256 - // used to be 256 @Option(help: "Pretrained weights") var weightsFile: String? @@ -26,7 +25,6 @@ struct Andrew01: ParsableCommand { func run() { let np = Python.import("numpy") let kHiddenDimension = 512 - // used to be 512 let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) @@ -116,4 +114,4 @@ fileprivate func unpack(_ t: Tuple2) -> (A, B) { /// Returns `t` as a Swift tuple. fileprivate func unpack(_ t: Tuple1) -> (A) { return (t.head) -} \ No newline at end of file +} From 909a094d0a1996584b7820a47f10b1190287a983 Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:14:55 -0500 Subject: [PATCH 15/34] Update Andrew01.swift --- Scripts/Andrew01.swift | 3 --- 1 file changed, 3 deletions(-) diff --git a/Scripts/Andrew01.swift b/Scripts/Andrew01.swift index 40676829..051aabb1 100644 --- a/Scripts/Andrew01.swift +++ b/Scripts/Andrew01.swift @@ -40,7 +40,6 @@ struct Andrew01: ParsableCommand { } else { rae.load(weights: np.load("./oist_rae_weight_\(featureSize).npy", allow_pickle: true)) } - print("s") // let (imageHeight, imageWidth, imageChannels) = // (40, 70, 1) @@ -54,7 +53,6 @@ struct Andrew01: ParsableCommand { let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! let trackerEvaluation = TrackerEvaluationDataset(testData) - print("s1") let evalTracker: Tracker = {frames, start in var tracker = trainProbabilisticTracker( trainingData: data, @@ -70,7 +68,6 @@ struct Andrew01: ParsableCommand { return track } - print("s2") let plt = Python.import("matplotlib.pyplot") let sequenceCount = 1 var results = trackerEvaluation.evaluate(evalTracker, sequenceCount: sequenceCount, deltaAnchor: 175, outputFile: "andrew01") From 2a145c2898878629f2ed0f476664a5def0a79c9c Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:17:56 -0500 Subject: [PATCH 16/34] Delete Brando09.swift --- Scripts/Brando09.swift | 138 ----------------------------------------- 1 file changed, 138 deletions(-) delete mode 100644 Scripts/Brando09.swift diff --git a/Scripts/Brando09.swift b/Scripts/Brando09.swift deleted file mode 100644 index d747ed02..00000000 --- a/Scripts/Brando09.swift +++ /dev/null @@ -1,138 +0,0 @@ -import ArgumentParser -import SwiftFusion -import BeeDataset -import BeeTracking -import TensorFlow -import PythonKit -import Foundation -import PenguinStructures - -/// Brando09: OPTIMIZATION VISUALIZATION -struct Brando09: ParsableCommand { - @Option(help: "Run for number of frames") - var trackLength: Int = 80 - - func run() { -// let np = Python.import("numpy") -// let plt = Python.import("matplotlib.pyplot") -// let trainingDatasetSize = 100 - -// // LOAD THE IMAGE AND THE GROUND TRUTH ORIENTED BOUNDING BOX -// let dataDir = URL(fileURLWithPath: "./OIST_Data") -// let testData = OISTBeeVideo(directory: dataDir, afterIndex: trainingDatasetSize, length: trackLength)! -// let frames = testData.frames -// let firstTrack = testData.tracks[0] -// // let firstTrack = testData.tracks[5] -// let firstFrame = frames[0] -// let firstObb = firstTrack.boxes[0] -// // let firstObb = firstTrack.boxes[5] - - -// // CREATE A PLACEHOLDER FOR POSE -// var v = VariableAssignments() - - -// // LOAD THE CLASSIFIER -// let (imageHeight, imageWidth, imageChannels) = -// (40, 70, 1) -// let featureSize = 512 -// let kHiddenDimension = 512 -// // var classifier = SmallerNNClassifier( -// // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize -// // ) -// var classifier = NNClassifier( -// imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize -// ) -// // classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_2.npy", allow_pickle: true)) -// classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_1_doubletraining.npy", allow_pickle: true)) - - - -// //OPTIMIZER GRADIENT DESCENT -// let lr = 1e-4 -// var optimizer = GradientDescent(learningRate: lr) - -// //CREATE A FOLDER TO CONTAIN THE END-RESULT IMAGES OF THE OPTIMIZATION -// let folderName = "Results/GD_optimization_lr_\(lr)_final_images" -// if !FileManager.default.fileExists(atPath: folderName) { -// do { -// try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) -// } catch { -// print(error.localizedDescription) -// } -// } - -// //PERFORM THIS OPTIMIZATION J TIMES -// for j in 0..<20 { - -// // RANDOMLY PERTURB THE GROUND TRUTH POSE AND CALCULATE THE PERTURBATION -// let poseId = v.store(firstObb.center) -// v[poseId].perturbWith(stddev: Vector3(0.3, 8, 4.6)) -// let dx = v[poseId].t.x - firstObb.center.t.x -// let dy = v[poseId].t.y - firstObb.center.t.y -// let dtheta = v[poseId].rot.theta - firstObb.center.rot.theta -// let startpose = v[poseId] - -// // CREATE THE FACTOR AND FACTOR GRAPH -// var fg = FactorGraph() -// let factor = ProbablisticTrackingFactor2(poseId, -// measurement: firstFrame, -// classifier: classifier, -// patchSize: (40, 70), -// appearanceModelSize: (40, 70) -// ) -// fg.store(factor) - - -// // CREATE A FOLDER FOR EACH OPTIMIZATION ROUND. -// // let folderName = "Results/GD_optimization_lr_\(lr)_\(j)" -// // if !FileManager.default.fileExists(atPath: folderName) { -// // do { -// // try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) -// // } catch { -// // print(error.localizedDescription) -// // } -// // } - -// // MAX ITERATIONS FOR OPTIMIZATION -// let it_limit = 1000 -// print("\(j)) Starting Optimization from: \(dx), \(dy), \(dtheta)") - - -// // PERFORM GRADIENT DESCENT -// for i in 0.. Date: Thu, 9 Dec 2021 11:18:12 -0500 Subject: [PATCH 17/34] Delete Brando04d1.swift --- Scripts/Brando04d1.swift | 121 --------------------------------------- 1 file changed, 121 deletions(-) delete mode 100644 Scripts/Brando04d1.swift diff --git a/Scripts/Brando04d1.swift b/Scripts/Brando04d1.swift deleted file mode 100644 index 9d225f84..00000000 --- a/Scripts/Brando04d1.swift +++ /dev/null @@ -1,121 +0,0 @@ -import ArgumentParser - -import SwiftFusion -import BeeDataset -import BeeTracking -import TensorFlow -import PythonKit -import Foundation - - - - -/// Brando04: NNClassifier training -struct Brando04d1: ParsableCommand { - typealias LikelihoodModel = TrackingLikelihoodModel - - - @Flag(help: "Training mode") - var training: Bool = false - - let num_boxes: Int = 10000 - let pert = Vector3(0.0, 30, 0) - - func getTrainingDataBG( - from dataset: OISTBeeVideo - ) -> (Tensor, Tensor) { - print("bg") - let frames_obbs = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes) - var bgBoxes = [Tensor]() - for i in 0...frames_obbs.count-1 { - var obb = frames_obbs[i].obb - obb.center.perturbWith(stddev: pert) - bgBoxes.append(frames_obbs[i].frame!.patch(at: obb)) - - } - - print("bg2") - let labels = Tensor(ones: [num_boxes]) - print("labels done bg") - let patches = Tensor(stacking: bgBoxes.map {$0}) - print("patches done bg") - return (labels, patches) - } - - - - func getTrainingDataFG( - from dataset: OISTBeeVideo - ) -> (Tensor, Tensor) { - print("fg") - let frames_obbs = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: num_boxes) - var fgBoxes = [Tensor]() - for i in 0...frames_obbs.count-1 { - var obb = frames_obbs[i].obb - obb.center.perturbWith(stddev: pert) - fgBoxes.append(frames_obbs[i].frame!.patch(at: obb)) - - } - - print("bg2") - let labels = Tensor(ones: [num_boxes]) - print("labels done bg") - let patches = Tensor(stacking: fgBoxes.map {$0}) - print("patches done bg") - return (labels, patches) - } - - - - func run() { - let folderName = "classifiers/classifiers_today" - if !FileManager.default.fileExists(atPath: folderName) { - do { - try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - } catch { - print(error.localizedDescription) - } - } else { - print("folder exists") - } - - - let dataDir = URL(fileURLWithPath: "./OIST_Data") - let trainingDataset = OISTBeeVideo(directory: dataDir, length: 100)! - var (labels_fg, patches_fg) = getTrainingDataFG(from: trainingDataset) - var (labels_bg, patches_bg) = getTrainingDataBG(from: trainingDataset) - - - var patches = Tensor(stacking: patches_bg.unstacked() + patches_fg.unstacked()) - var labels = Tensor(concatenate(labels_bg, labels_fg)) - print("shape of patches", patches.shape) - print("shape of labels", labels.shape) - - let kHiddenDimension = 512 - let featSize = 512 - let iterations = [1] - - - let lr = Float(1e-6) - for i in iterations { - let pretrained_weights = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr).npy" - let path = "./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featSize)_\(i)_20000boxes_300epochs_retrained(0.0, 30, 0)_lr=\(lr)_2nd_iter.npy" - if FileManager.default.fileExists(atPath: path) { - print("File Already Exists. Abort training") - continue - } - print("Training...") - let rae: PretrainedNNClassifier = PretrainedNNClassifier( - patches: patches, - labels: labels, - given: PretrainedNNClassifier.HyperParameters(hiddenDimension: kHiddenDimension, latentDimension: featSize, weightFile: pretrained_weights, learningRate: lr), - train_mode: "pretrained" - ) - rae.save(to: path) - - } - - - - } -} From d1dcc67e8c952910b679204b23e7333b267d1428 Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:19:04 -0500 Subject: [PATCH 18/34] Update main.swift --- Scripts/main.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Scripts/main.swift b/Scripts/main.swift index f355074b..c8337fea 100644 --- a/Scripts/main.swift +++ b/Scripts/main.swift @@ -17,8 +17,8 @@ import PenguinParallelWithFoundation struct Scripts: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, Brando04d1.self, - Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando09.self, + subcommands: [Brando01.self, Brando02.self, Brando03.self, Brando04.self, + Brando05.self, Brando06.self, Brando07.self, Brando08.self, Brando10.self, Brando11.self, Brando12.self, Brando13.self, Brando14.self, Brando15.self, Brando16.self, Andrew01.self, Andrew05.self, Andrew06.self, Andrew07.self, Andrew08.self, Fan01.self, Fan02.self, Fan03.self, Fan04.self, Fan05.self, Fan10.self, Fan12.self, Fan13.self, Fan14.self, From af643ad49e1ceb784ede6d1e37886a055b25921e Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:20:02 -0500 Subject: [PATCH 19/34] Update Brando05.swift --- Scripts/Brando05.swift | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Scripts/Brando05.swift b/Scripts/Brando05.swift index d46aaf70..b8a30e9e 100644 --- a/Scripts/Brando05.swift +++ b/Scripts/Brando05.swift @@ -19,7 +19,6 @@ struct Brando05: ParsableCommand { let np = Python.import("numpy") let featureSizes = [256] let kHiddenDimensions = [512] - // let iterations = [1,2,3,4,5,6,7] let iterations = [1] let trainingDatasetSize = 100 @@ -37,16 +36,11 @@ struct Brando05: ParsableCommand { let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) - // var classifier = SmallerNNClassifier( - // imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, latentDimension: featureSize - // ) var classifier = NNClassifier( imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels, hiddenDimension: kHiddenDimension, latentDimension: featureSize ) // LOAD THE CLASSIFIER - // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j)_doubletraining.npy", allow_pickle: true)) classifier.load(weights: np.load("./classifiers/classifiers_today/large_classifier_weight_\(kHiddenDimension)_\(featureSize)_\(j).npy", allow_pickle: true)) - // classifier.load(weights: np.load("./classifiers/classifiers_today/small_classifier_weight_\(featureSize)_\(j).npy", allow_pickle: true)) let evalTracker: Tracker = {frames, start in var tracker = makeProbabilisticTracker2( @@ -59,8 +53,6 @@ struct Brando05: ParsableCommand { return track } - // print(evalTracker) - // return let plt = Python.import("matplotlib.pyplot") let sequenceCount = 1 @@ -73,9 +65,7 @@ struct Brando05: ParsableCommand { if !FileManager.default.fileExists(atPath: folderName) { do { try FileManager.default.createDirectory(atPath: folderName, withIntermediateDirectories: true, attributes: nil) - // print("here") try FileManager.default.createDirectory(atPath: folderName + "/sequence0", withIntermediateDirectories: true, attributes: nil) - // print("here2") } catch { print(error.localizedDescription) } @@ -126,4 +116,4 @@ fileprivate func unpack(_ t: Tuple2) -> (A, B) { /// Returns `t` as a Swift tuple. fileprivate func unpack(_ t: Tuple1) -> (A) { return (t.head) -} \ No newline at end of file +} From d30edbb5247bf6acc3828a83de5048cede22ab78 Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:21:16 -0500 Subject: [PATCH 20/34] Update Brando06.swift --- Scripts/Brando06.swift | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/Scripts/Brando06.swift b/Scripts/Brando06.swift index 67390d73..811469a4 100644 --- a/Scripts/Brando06.swift +++ b/Scripts/Brando06.swift @@ -14,21 +14,16 @@ import PenguinStructures struct Brando06: ParsableCommand { func run() { - // let featSizes = [8,16,64,128,256] let dataDir = URL(fileURLWithPath: "./OIST_Data") let testData = OISTBeeVideo(directory: dataDir, afterIndex: 100, length: 80)! let batchSize = 3000 - // print("tests here1") let fgBoxes = testData.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) - // print("here 1.5") let bgBoxes = testData.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) - // print("tests here2") let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) let np = Python.import("numpy") let kHiddenDimensions = [512] let featSizes = [512] - print("uu") var plt = Python.import("matplotlib.pyplot") @@ -51,7 +46,6 @@ struct Brando06: ParsableCommand { if let weightsFile = weightsFile { classifier.load(weights: np.load(weightsFile, allow_pickle: true)) } else { - // classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_\(kHiddenDimension)_\(featureSize)_\(num).npy", allow_pickle: true)) classifier.load(weights: np.load("./classifiers/classifiers_today/classifier_weight_512_512_1_doubletraining.npy", allow_pickle: true)) } @@ -59,9 +53,6 @@ struct Brando06: ParsableCommand { let outbg = classifier.classify(bgpatches) let softmaxfg = softmax(outfg) let softmaxbg = softmax(outbg) - // print(outfg[0...3]) - // print("printing foreground:", softmaxfg[0...10]) - // print("printing background:", softmaxbg[0...10]) let folderName = "Results/brando06/classified_images" if !FileManager.default.fileExists(atPath: folderName) { do { @@ -91,8 +82,6 @@ struct Brando06: ParsableCommand { let shapefg = outfg.shape let shapebg = outbg.shape - // print("fg", outfg) - // print("bg", outbg) var fgsum0 = 0.0 var fgsum1 = 0.0 @@ -123,7 +112,6 @@ struct Brando06: ParsableCommand { var (figs, axs) = plt.subplots(2,2).tuple2 print("asda") - // plt.GridSpec(2, 2, wspace: 0.1, hspace: 0.8) plt.subplots_adjust(left:0.1, bottom:0.1, @@ -133,7 +121,6 @@ struct Brando06: ParsableCommand { hspace:0.4) - // var (fig, ax1) = plt.subplots().tuple2 var ax1 = axs[1,0] ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50) var mean = fgsum0/Double(batchSize) @@ -143,7 +130,6 @@ struct Brando06: ParsableCommand { } ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) - // (fig, ax1) = plt.subplots().tuple2 ax1 = axs[0,0] ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50) mean = fgsum1/Double(batchSize) @@ -154,7 +140,6 @@ struct Brando06: ParsableCommand { ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) ax1 = axs[1,1] - // (fig, ax1) = plt.subplots().tuple2 ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50) mean = bgsum0/Double(batchSize) sd = 0.0 @@ -165,7 +150,6 @@ struct Brando06: ParsableCommand { ax1 = axs[0,1] - // (fig, ax1) = plt.subplots().tuple2 ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50) mean = bgsum1/Double(batchSize) sd = 0.0 @@ -193,4 +177,4 @@ struct Brando06: ParsableCommand { } -} \ No newline at end of file +} From d196b13d7764db4adad9cbbc493b0dfca4e97b41 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:21:42 -0500 Subject: [PATCH 21/34] Update Fan05.swift remove unnecessary comments --- Scripts/Fan05.swift | 2 -- 1 file changed, 2 deletions(-) diff --git a/Scripts/Fan05.swift b/Scripts/Fan05.swift index 44cf8bf3..1082d207 100644 --- a/Scripts/Fan05.swift +++ b/Scripts/Fan05.swift @@ -28,11 +28,9 @@ struct Fan05: ParsableCommand { let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: numberBackground).map { (frame: $0.frame, type: LikelihoodModel.PatchType.bg, obb: $0.obb) } - print("d1") let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: numberForeground).map { (frame: $0.frame, type: LikelihoodModel.PatchType.fg, obb: $0.obb) } - print("done") return fgBoxes + bgBoxes } From b76fd19936a2eae53670127c2d8d30693f0e443a Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:25:57 -0500 Subject: [PATCH 22/34] Update main.swift code cleanup --- Examples/BeeTrackingTool/main.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index cc76e0aa..ac2712f6 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -142,7 +142,7 @@ struct InferTrackRAE: ParsableCommand { frames: videoSlice.frames, targetSize: (video.track[0].rows, video.track[0].cols)) - // if verbose { tracker.optimizer.verbosity = .SUMMARY } FOR LM Optimizer + if verbose { tracker.optimizer.verbosity = .SUMMARY } let startPose = videoSlice.track[0].center let startPatch = Tensor(videoSlice.frames[0].patch( @@ -185,7 +185,7 @@ struct InferTrackRawPixels: ParsableCommand { var tracker = makeRawPixelTracker(frames: videoSlice.frames, target: startPatch) - // if verbose { tracker.optimizer.verbosity = .SUMMARY } FOR LM Optimizer + if verbose { tracker.optimizer.verbosity = .SUMMARY } let prediction = tracker.infer(knownStart: Tuple1(startPose)) From 4ff8195f2acfe637f9ac78ce2653a4d4f75d8187 Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:26:59 -0500 Subject: [PATCH 23/34] Update Brando07.swift --- Scripts/Brando07.swift | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/Scripts/Brando07.swift b/Scripts/Brando07.swift index e15940b8..e2e55e31 100644 --- a/Scripts/Brando07.swift +++ b/Scripts/Brando07.swift @@ -16,16 +16,13 @@ struct Brando07: ParsableCommand { @Option(help: "Size of feature space") var featureSize: Int = 256 - // used to be 256 @Option(help: "Pretrained weights") var weightsFile: String? - // Runs RAE tracker on n number of sequences and outputs relevant images and statistics func run() { let np = Python.import("numpy") let kHiddenDimension = 512 - // used to be 512 let (imageHeight, imageWidth, imageChannels) = (40, 70, 1) @@ -47,13 +44,9 @@ struct Brando07: ParsableCommand { let dataDir = URL(fileURLWithPath: "./OIST_Data") let numberOfTrainingSamples = 3000 - // let fgRandomFrameCount = 10 - // let bgRandomFrameCount = 10 - // let boundingBoxSize = (40, 70) let dataset = OISTBeeVideo(directory: dataDir, length: 100)! // calling this twice caused the Killed to happen let batchSize = 3000 - // print("tests here1") let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) print("here 1.5") let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) @@ -62,21 +55,11 @@ struct Brando07: ParsableCommand { let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) print("patches complete") - // let (fg, bg, _) = getTrainingBatches( - // dataset: dataset, boundingBoxSize: boundingBoxSize, - // fgBatchSize: numberOfTrainingSamples, - // bgBatchSize: numberOfTrainingSamples, - // fgRandomFrameCount: fgRandomFrameCount, - // bgRandomFrameCount: bgRandomFrameCount, - // useCache: true - // ) let batchPositive = rae.encode(fgpatches) print("shape batch positive", batchPositive.shape) - // let foregroundModel = GaussianNB(from:batchPositive, regularizer: 1e-3) let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) let batchNegative = rae.encode(bgpatches) - // let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) var outfg0 = [Double]() @@ -89,15 +72,12 @@ struct Brando07: ParsableCommand { for i in 0...numberOfTrainingSamples-1 { outfg0.append(backgroundModel.probability(batchPositive[i,0...])) - // print("probability", backgroundModel.probability(batchPositive[i,0...])) outfg1.append(foregroundModel.probability(batchPositive[i,0...])) outbg0.append(backgroundModel.probability(batchNegative[i,0...])) outbg1.append(foregroundModel.probability(batchNegative[i,0...])) } - // print(outfg0) - // print(outfg1) - // let batchSize = numberOfTrainingSamples + var plt = Python.import("matplotlib.pyplot") @@ -130,7 +110,6 @@ struct Brando07: ParsableCommand { var (figs, axs) = plt.subplots(2,2).tuple2 print("asda") - // plt.GridSpec(2, 2, wspace: 0.1, hspace: 0.8) plt.subplots_adjust(left:0.1, bottom:0.1, @@ -140,7 +119,6 @@ struct Brando07: ParsableCommand { hspace:0.4) - // var (fig, ax1) = plt.subplots().tuple2 var ax1 = axs[1,0] ax1.hist(fg0_arr, range: Python.tuple([-1,1]), bins: 50) var mean = fgsum0/Double(batchSize) @@ -150,7 +128,6 @@ struct Brando07: ParsableCommand { } ax1.set_title("Foreground. Output response for background. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) - // (fig, ax1) = plt.subplots().tuple2 ax1 = axs[0,0] ax1.hist(fg1_arr, range: Python.tuple([-1,1]), bins: 50) mean = fgsum1/Double(batchSize) @@ -161,7 +138,6 @@ struct Brando07: ParsableCommand { ax1.set_title("Foreground. Output response for foreground. \n Mean = \(String(format: "%.2f", mean)) and SD = \(sd).", fontsize:8) ax1 = axs[1,1] - // (fig, ax1) = plt.subplots().tuple2 ax1.hist(bg0_arr, range: Python.tuple([-1,1]), bins: 50) mean = bgsum0/Double(batchSize) sd = 0.0 @@ -172,7 +148,6 @@ struct Brando07: ParsableCommand { ax1 = axs[0,1] - // (fig, ax1) = plt.subplots().tuple2 ax1.hist(bg1_arr, range: Python.tuple([-1,1]), bins: 50) mean = bgsum1/Double(batchSize) sd = 0.0 From 9edb36b936905dc008dc22b44782f9dd58f7b707 Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:28:56 -0500 Subject: [PATCH 24/34] Update Brando08.swift --- Scripts/Brando08.swift | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Scripts/Brando08.swift b/Scripts/Brando08.swift index dd5648c3..f4d414c1 100644 --- a/Scripts/Brando08.swift +++ b/Scripts/Brando08.swift @@ -14,21 +14,14 @@ import PenguinStructures struct Brando08: ParsableCommand { func run() { - // let featSizes = [8,16,64,128,256] let dataDir = URL(fileURLWithPath: "./OIST_Data") let dataset = OISTBeeVideo(directory: dataDir, length: 100)! let batchSize = 300 - // print("tests here1") let fgBoxes = dataset.makeForegroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) - // print("here 1.5") let bgBoxes = dataset.makeBackgroundBoundingBoxes(patchSize: (40, 70), batchSize: batchSize) - // print("tests here2") let fgpatches = Tensor(stacking: fgBoxes.map { $0.frame!.patch(at: $0.obb)}) let bgpatches = Tensor(stacking: bgBoxes.map { $0.frame!.patch(at: $0.obb)}) let np = Python.import("numpy") - // let kHiddenDimensions = [256,512] - // let featSizes = [64,128,256] - // print("uu") var plt = Python.import("matplotlib.pyplot") let mpl = Python.import("matplotlib") @@ -51,4 +44,4 @@ struct Brando08: ParsableCommand { } } -} \ No newline at end of file +} From 218a0ef0bcfd2d4163e5dbf599b0dede6e38a4a0 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:31:10 -0500 Subject: [PATCH 25/34] Update FactorBoilerplate.swift --- Sources/SwiftFusion/Inference/FactorBoilerplate.swift | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift index e2abf769..f3d563f3 100644 --- a/Sources/SwiftFusion/Inference/FactorBoilerplate.swift +++ b/Sources/SwiftFusion/Inference/FactorBoilerplate.swift @@ -267,8 +267,7 @@ extension LinearizableFactor2 { // Implements the error as half the squared norm of the error vector. public func error(at x: Variables) -> Double { - return errorVector(at: x).squaredNorm - // return 0.5 * errorVector(at: x).squaredNorm + return 0.5 * errorVector(at: x).squaredNorm } // Forwarding implementation. From 06f530bba7aa1dcc1f1cad68f9c8a397ec0a4b25 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:32:21 -0500 Subject: [PATCH 26/34] Update MultivariateGaussian.swift --- Sources/SwiftFusion/Probability/MultivariateGaussian.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift index 894d59e6..33885a73 100644 --- a/Sources/SwiftFusion/Probability/MultivariateGaussian.swift +++ b/Sources/SwiftFusion/Probability/MultivariateGaussian.swift @@ -6,7 +6,7 @@ // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, softwarew +// Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and From 469e6d1b95c9301faa04bc083fd3f76c4e2f7da8 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:33:14 -0500 Subject: [PATCH 27/34] Update GradientDescentTests.swift --- .../Optimizers/GradientDescentTests.swift | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift index bc782dd2..de38d895 100644 --- a/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift +++ b/Tests/SwiftFusionTests/Optimizers/GradientDescentTests.swift @@ -17,28 +17,28 @@ import SwiftFusion import XCTest final class GradientDescentTests: XCTestCase { - /// Test convergence for a simple Pose2SLAM graph. - // func testPose2SLAM() { - // var x = VariableAssignments() - // let pose1ID = x.store(Pose2(Rot2(0.2), Vector2(0.5, 0.0))) - // let pose2ID = x.store(Pose2(Rot2(-0.2), Vector2(2.3, 0.1))) - // let pose3ID = x.store(Pose2(Rot2(.pi / 2), Vector2(4.1, 0.1))) - // let pose4ID = x.store(Pose2(Rot2(.pi), Vector2(4.0, 2.0))) - // let pose5ID = x.store(Pose2(Rot2(-.pi / 2), Vector2(2.1, 2.1))) + // Test convergence for a simple Pose2SLAM graph. + func testPose2SLAM() { + var x = VariableAssignments() + let pose1ID = x.store(Pose2(Rot2(0.2), Vector2(0.5, 0.0))) + let pose2ID = x.store(Pose2(Rot2(-0.2), Vector2(2.3, 0.1))) + let pose3ID = x.store(Pose2(Rot2(.pi / 2), Vector2(4.1, 0.1))) + let pose4ID = x.store(Pose2(Rot2(.pi), Vector2(4.0, 2.0))) + let pose5ID = x.store(Pose2(Rot2(-.pi / 2), Vector2(2.1, 2.1))) - // var graph = FactorGraph() - // graph.store(BetweenFactor(pose2ID, pose1ID, Pose2(2.0, 0.0, .pi / 2))) - // graph.store(BetweenFactor(pose3ID, pose2ID, Pose2(2.0, 0.0, .pi / 2))) - // graph.store(BetweenFactor(pose4ID, pose3ID, Pose2(2.0, 0.0, .pi / 2))) - // graph.store(BetweenFactor(pose5ID, pose4ID, Pose2(2.0, 0.0, .pi / 2))) - // graph.store(PriorFactor(pose1ID, Pose2(0, 0, 0))) + var graph = FactorGraph() + graph.store(BetweenFactor(pose2ID, pose1ID, Pose2(2.0, 0.0, .pi / 2))) + graph.store(BetweenFactor(pose3ID, pose2ID, Pose2(2.0, 0.0, .pi / 2))) + graph.store(BetweenFactor(pose4ID, pose3ID, Pose2(2.0, 0.0, .pi / 2))) + graph.store(BetweenFactor(pose5ID, pose4ID, Pose2(2.0, 0.0, .pi / 2))) + graph.store(PriorFactor(pose1ID, Pose2(0, 0, 0))) - // let optimizer = GradientDescent(learningRate: 1e-2) - // for _ in 0..<10000 { - // optimizer.update(&x, objective: graph) - // } + let optimizer = GradientDescent(learningRate: 1e-2) + for _ in 0..<10000 { + optimizer.update(&x, objective: graph) + } - // // Test condition: pose 5 should be identical to pose 1 (close loop). - // XCTAssertEqual(between(x[pose1ID], x[pose5ID]).t.norm, 0.0, accuracy: 1e-2) - // } + // Test condition: pose 5 should be identical to pose 1 (close loop). + XCTAssertEqual(between(x[pose1ID], x[pose5ID]).t.norm, 0.0, accuracy: 1e-2) + } } From 715854cf67f0571ab0cb8fd0e7d7f7cb6d020955 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:34:10 -0500 Subject: [PATCH 28/34] Update ProbablisticTrackingFactor.swift --- Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift index 01eca8d2..b5f4ae49 100644 --- a/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift +++ b/Sources/SwiftFusion/Inference/ProbablisticTrackingFactor.swift @@ -122,7 +122,6 @@ public struct ProbablisticTrackingFactor2< @differentiable public func errorVector(_ pose: Pose2) -> Vector1 { - // print("errorVector") let region = OrientedBoundingBox(center: pose, rows: patchSize.0, cols: patchSize.1) let patch = Tensor(measurement.patch(at: region, outputSize: appearanceModelSize).tensor) let output = classifier.classify(patch.expandingShape(at: 0)).squeezingShape(at: 0) @@ -133,4 +132,4 @@ public struct ProbablisticTrackingFactor2< var result = loglikelihood.scalarized() return Vector1(result) } -} \ No newline at end of file +} From 9001b2cfc5cb526165f1b5c2a01d38e8439443cc Mon Sep 17 00:00:00 2001 From: brandoDecu <43319223+brandoDecu@users.noreply.github.com> Date: Thu, 9 Dec 2021 11:34:41 -0500 Subject: [PATCH 29/34] Update Brando12.swift --- Scripts/Brando12.swift | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/Scripts/Brando12.swift b/Scripts/Brando12.swift index 83509c84..fad930f2 100644 --- a/Scripts/Brando12.swift +++ b/Scripts/Brando12.swift @@ -195,8 +195,6 @@ struct Brando12: ParsableCommand { + "\n learning rate = \(lr)" + "\n converged = \(conv)") figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") - // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) - // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") plt.close("all") fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") @@ -267,9 +265,7 @@ struct Brando12: ParsableCommand { let y_out_of_bounds = (v[poseId].t.y > firstObb.center.t.y + xy_thresh) || (v[poseId].t.y < firstObb.center.t.y - xy_thresh) let theta_out_of_bounds = (v[poseId].rot.theta > firstObb.center.rot.theta + theta_thresh) || (v[poseId].rot.theta < firstObb.center.rot.theta - theta_thresh) if !x_out_of_bounds && !theta_out_of_bounds && !y_out_of_bounds { - // plot a green dot - // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"r", marker: ",") - // ax.scatter(startpose.t.x,startpose.t.y,c:"r", marker: ",") + if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { axs[0,0].plot(startpose.t.x,startpose.t.y,"g,", ms: 1) } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { @@ -281,8 +277,6 @@ struct Brando12: ParsableCommand { } } else { - // ax.scatter(startpose.t.x-Double(xbegin),startpose.t.y-Double(ybegin),c:"g", marker: ",") - // ax.scatter(startpose.t.x,startpose.t.y,c:"g", marker: ",") if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.1 { axs[0,0].plot(startpose.t.x,startpose.t.y,"r,", ms: 1) } else if fabs(startpose.rot.theta - firstObb.center.rot.theta) < 0.2 { @@ -308,12 +302,10 @@ struct Brando12: ParsableCommand { + "\n learning rate = \(lr)" + "\n converged = \(conv)") figs.savefig(folderName + "/optimization_final_\(j).png", bbox_inches: "tight") - // let (figs2, axes2) = plotXYandTheta(xs: xs, ys: ys, thetas: thetas) - // figs2.savefig(folderName + "/optimization_final_\(j)_XYtheta.png", bbox_inches: "tight") plt.close("all") fig.savefig(folderName + "/optimization_covergence_red_n_green_dots.png", bbox_inches: "tight") } } } -} \ No newline at end of file +} From 584f18643fea87a4e42223dd2d7dfe7e09ab785e Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:35:56 -0500 Subject: [PATCH 30/34] Update TrackingMetrics.swift --- Sources/BeeTracking/TrackingMetrics.swift | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Sources/BeeTracking/TrackingMetrics.swift b/Sources/BeeTracking/TrackingMetrics.swift index ff429128..2cdfb529 100644 --- a/Sources/BeeTracking/TrackingMetrics.swift +++ b/Sources/BeeTracking/TrackingMetrics.swift @@ -189,13 +189,11 @@ extension TrackerEvaluationDataset { deltaAnchor: Int, outputFile: String ) -> TrackerEvaluationResults { - // print("yooo") let sequenceEvaluations = sequences.prefix(sequenceCount).enumerated().map { (i, sequence) -> SequenceEvaluationResults in print("Evaluating sequence \(i + 1) of \(sequenceCount)") return sequence.evaluate(tracker, deltaAnchor: deltaAnchor, outputFile: "\(outputFile)-sequence\(i)") } - // print("yooo2") let result = TrackerEvaluationResults( sequences: sequenceEvaluations, @@ -252,25 +250,15 @@ extension TrackerEvaluationSequence { else { continue } - // print("a") let subsequence = subsequences[i] print("Evaluating subsequence \(i + 1) of \(subsequences.count)") - // print("gggg") - // print(buf.baseAddress) - // if i print subsequence.frames it infinite loops - // print(subsequence) - // print(subsequence.groundTruth[0]) - // print(tracker(subsequence.frames, subsequence.groundTruth[0])) (buf.baseAddress! + i).initialize(to: tracker(subsequence.frames, subsequence.groundTruth[0])) - // print("d") } } - // print("b") actualCount = subsequences.count } - // print("c") let subsequenceEvaluations = zip(subsequences, subsequencePredictions).map { SubsequenceEvaluationResults( @@ -279,11 +267,9 @@ extension TrackerEvaluationSequence { groundTruth: $0.0.groundTruth, frames: $0.0.frames) } - // print("e") let result = SequenceEvaluationResults( subsequences: subsequenceEvaluations, sequenceMetrics: SequenceMetrics(subsequenceEvaluations.map { $0.metrics })) - // print("f") return result } From 0db61ed7c3720f1e83fea86f60c1e8a57e33a9d9 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:37:19 -0500 Subject: [PATCH 31/34] Update TrackingFactorGraph.swift --- Sources/BeeTracking/TrackingFactorGraph.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/BeeTracking/TrackingFactorGraph.swift b/Sources/BeeTracking/TrackingFactorGraph.swift index aef651a7..89dd760e 100644 --- a/Sources/BeeTracking/TrackingFactorGraph.swift +++ b/Sources/BeeTracking/TrackingFactorGraph.swift @@ -252,7 +252,7 @@ public struct TrackingConfiguration { // Initialize the variables one frame at a time. Each iteration intializes the `i+1`-th // variable. for i in 0..<(frames.count - 1) { - print("Infr \(i + 1)/\(frames.count - 1) ", terminator: "") + print("Inferring for frame \(i + 1) of \(frames.count - 1)") extendTrack(x: &x, fromFrame:i, withSampling:samplingFlag) } From 54d589f0c50c7438dd97b12db87b86fb83224c06 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:38:05 -0500 Subject: [PATCH 32/34] Update ProbabilisticTracker.swift --- Sources/BeeTracking/ProbabilisticTracker.swift | 2 -- 1 file changed, 2 deletions(-) diff --git a/Sources/BeeTracking/ProbabilisticTracker.swift b/Sources/BeeTracking/ProbabilisticTracker.swift index f1506132..f1ae8938 100644 --- a/Sources/BeeTracking/ProbabilisticTracker.swift +++ b/Sources/BeeTracking/ProbabilisticTracker.swift @@ -150,12 +150,10 @@ public func trainProbabilisticTracker( useCache: true ) let batchPositive = encoder.encode(fg) - // let foregroundModel = GaussianNB(from:batchPositive, regularizer: 1e-3) let foregroundModel = MultivariateGaussian(from:batchPositive, regularizer: 1e-3) let batchNegative = encoder.encode(bg) - // let backgroundModel = GaussianNB(from: batchNegative, regularizer: 1e-3) let backgroundModel = MultivariateGaussian(from: batchNegative, regularizer: 1e-3) let tracker = makeProbabilisticTracker( From d44eb44735ce390b5f5c1b1d9183c76c5a9d3511 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:39:06 -0500 Subject: [PATCH 33/34] Update OISTBeeVideo+Batches.swift --- .../BeeTracking/OISTBeeVideo+Batches.swift | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift index ad31671f..5b7ac722 100644 --- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift +++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift @@ -86,16 +86,7 @@ extension OISTBeeVideo { /// Anything not completely overlapping labels var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42) let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy) - - print("OISTBeeVideo+Batches.swift. MakeForegroundBoundingBoxes") - let label = frames[0].1.labels[0] - - - - var label_count = 0 - for i in 0...(frames.count-1) { - label_count += frames[i].1.labels.count - } + // We need `batchSize / frames.count` patches from each frame, plus the remainder of the // integer division. var patchesPerFrame = Array(repeating: batchSize / frames.count, count: frames.count) @@ -126,14 +117,6 @@ extension OISTBeeVideo { var deterministicEntropy = ARC4RandomNumberGenerator(seed: 42) let frames = self.randomFrames(self.frames.count, using: &deterministicEntropy) - print("OISTBeeVideo+Batches.swift. MakeBackgroundBoundingBoxes") - let label = frames[0].1.labels[0] - - - var label_count = 0 - for i in 0...(frames.count-1) { - label_count += frames[i].1.labels.count - } // We need `batchSize / frames.count` patches from each frame, plus the remainder of the // integer division. From eedd730f876ccbc8042c8d9d0db97e67e696dd62 Mon Sep 17 00:00:00 2001 From: Andrew Marmon Date: Thu, 9 Dec 2021 11:44:29 -0500 Subject: [PATCH 34/34] Update FactorGraphTests.swift --- .../Inference/FactorGraphTests.swift | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift b/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift index 45661a8c..88fa31d2 100644 --- a/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift +++ b/Tests/SwiftFusionTests/Inference/FactorGraphTests.swift @@ -256,27 +256,27 @@ class FactorGraphTests: XCTestCase { } /// Test the gradient of the error of a factor graph. - // func testGradient() { - // var vars = VariableAssignments() - // let v1ID = vars.store(Vector2(1, 2)) - // let v2ID = vars.store(Vector2(3, 4)) - // let v3ID = vars.store(Vector3(5, 6, 7)) + func testGradient() { + var vars = VariableAssignments() + let v1ID = vars.store(Vector2(1, 2)) + let v2ID = vars.store(Vector2(3, 4)) + let v3ID = vars.store(Vector3(5, 6, 7)) - // var graph = FactorGraph() - // graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 1)) - // graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 2)) - // graph.store(ScalarJacobianFactor(edges: Tuple1(v2ID), scalar: 5)) - // graph.store(ScalarJacobianFactor(edges: Tuple1(v3ID), scalar: 10)) + var graph = FactorGraph() + graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 1)) + graph.store(ScalarJacobianFactor(edges: Tuple1(v1ID), scalar: 2)) + graph.store(ScalarJacobianFactor(edges: Tuple1(v2ID), scalar: 5)) + graph.store(ScalarJacobianFactor(edges: Tuple1(v3ID), scalar: 10)) - // let grad = graph.errorGradient(at: vars) + let grad = graph.errorGradient(at: vars) - // // gradient of ||1 * v1||^2 + ||2 * v1||^2 at v1 = (1, 2) - // XCTAssertEqual(grad[v1ID], Vector2(10, 20)) + // gradient of ||1 * v1||^2 + ||2 * v1||^2 at v1 = (1, 2) + XCTAssertEqual(grad[v1ID], Vector2(10, 20)) - // // gradient of ||5 * v2||^2 at v2 = (3, 4) - // XCTAssertEqual(grad[v2ID], Vector2(150, 200)) + // gradient of ||5 * v2||^2 at v2 = (3, 4) + XCTAssertEqual(grad[v2ID], Vector2(150, 200)) - // // gradient of ||10 * v3||^2 at v3 = (5, 6, 7) - // XCTAssertEqual(grad[v3ID], Vector3(1000, 1200, 1400)) - // } + // gradient of ||10 * v3||^2 at v3 = (5, 6, 7) + XCTAssertEqual(grad[v3ID], Vector3(1000, 1200, 1400)) + } }