Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP-Do not MERGE] Marcrasi running experiments #227

Open
wants to merge 13 commits into
base: feature/ppca
Choose a base branch
from
155 changes: 135 additions & 20 deletions Examples/BeeTrackingTool/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import TensorFlow

struct BeeTrackingTool: ParsableCommand {
static var configuration = CommandConfiguration(
subcommands: [TrainRAE.self, InferTrackRAE.self, InferTrackRawPixels.self])
subcommands: [TrainRAE.self, InferTrackRAE.self, InferTrackRawPixels.self, NaiveRae.self])
}

/// The dimension of the hidden layer in the appearance model.
Expand Down Expand Up @@ -164,35 +164,150 @@ struct InferTrackRAE: ParsableCommand {

/// Infers a track on a VOT video, using the raw pixel tracker.
struct InferTrackRawPixels: ParsableCommand {
@Option(help: "Base directory of the VOT dataset")
var votBaseDirectory: String
func run() {

@Option(help: "Name of the VOT video to use")
var videoName: String
func rawPixelTracker(_ frames: [Tensor<Float>], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] {
var tracker = makeRawPixelTracker(frames: frames, target: frames[0].patch(at: start))
tracker.optimizer.precision = 1e0
let prediction = tracker.infer(knownStart: Tuple1(start.center))
return tracker.frameVariableIDs.map { varIds in
let poseId = varIds.head
return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols)
}
}

@Option(help: "How many frames to track")
var frameCount: Int = 50
var dataset = OISTBeeVideo()!
// Only do inference on the interesting tracks.
dataset.tracks = [3, 5, 6, 7].map { dataset.tracks[$0] }
let trackerEvaluationDataset = TrackerEvaluationDataset(dataset)
let eval = trackerEvaluationDataset.evaluate(
rawPixelTracker, sequenceCount: dataset.tracks.count, deltaAnchor: 100, outputFile: "rawpixel.json")
print(eval.trackerMetrics.accuracy)
print(eval.trackerMetrics.robustness)
}
}

@Flag(help: "Print progress information")
/// Tracking with a Naive Bayes with RAE
struct NaiveRae: ParsableCommand {
@Option(help: "Where to load the RAE weights")
var loadWeights: String

@Option(help: "The dimension of the latent code in the RAE appearance model")
var kLatentDimension: Int

@Option(help: "The dimension of the hidden code in the RAE appearance model")
var kHiddenDimension = 100

@Flag
var verbose: Bool = false

func run() {
let video = VOTVideo(votBaseDirectory: votBaseDirectory, videoName: videoName)!
let videoSlice = video[0..<min(video.frames.count, frameCount)]
@Option
var outputFile: String

let startPose = videoSlice.track[0].center
let startPatch = videoSlice.frames[0].patch(at: videoSlice.track[0])
@Option
var truncate: Int

var tracker = makeRawPixelTracker(frames: videoSlice.frames, target: startPatch)
/// Returns predictions for `videoName` using the raw pixel tracker.
func naiveRaeTrack(dataset dataset_: OISTBeeVideo) {
var dataset = dataset_
dataset.labels = dataset.labels.map {
$0.filter({ $0.label == .Body })
}
// Make batch and do RAE
let (batch, _) = dataset.makeBatch(appearanceModelSize: (40, 70), batchSize: 200)
var statistics = FrameStatistics(batch)
statistics.mean = Tensor(62.26806976644069)
statistics.standardDeviation = Tensor(37.44683834503672)

let backgroundBatch = dataset.makeBackgroundBatch(
patchSize: (40, 70), appearanceModelSize: (40, 70),
statistics: statistics,
batchSize: 300
)

if verbose { tracker.optimizer.verbosity = .SUMMARY }
let (imageHeight, imageWidth, imageChannels) =
(batch.shape[1], batch.shape[2], batch.shape[3])

if verbose { print("Loading RAE model, \(batch.shape)...") }

let np = Python.import("numpy")

let prediction = tracker.infer(knownStart: Tuple1(startPose))
var rae = DenseRAE(
imageHeight: imageHeight, imageWidth: imageWidth, imageChannels: imageChannels,
hiddenDimension: kHiddenDimension, latentDimension: kLatentDimension
)
rae.load(weights: np.load(loadWeights, allow_pickle: true))

if verbose { print("Fitting Naive Bayes model") }

var (foregroundModel, backgroundModel) = (
MultivariateGaussian(
dims: TensorShape([kLatentDimension]),
regularizer: 1e-3
), GaussianNB(
dims: TensorShape([kLatentDimension]),
regularizer: 1e-3
)
)

let batchPositive = rae.encode(batch)
foregroundModel.fit(batchPositive)

let batchNegative = rae.encode(backgroundBatch)
backgroundModel.fit(batchNegative)

if verbose {
print("Foreground: \(foregroundModel)")
print("Background: \(backgroundModel)")
}

let boxes = tracker.frameVariableIDs.map { frameVariableIDs -> OrientedBoundingBox in
let poseID = frameVariableIDs.head
return OrientedBoundingBox(
center: prediction[poseID], rows: video.track[0].rows, cols: video.track[0].cols)
func tracker(_ frames: [Tensor<Float>], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] {
var tracker = makeNaiveBayesAETracker(
model: rae,
statistics: statistics,
frames: frames,
targetSize: (start.rows, start.cols),
foregroundModel: foregroundModel, backgroundModel: backgroundModel
)
tracker.optimizer.cgls_precision = 1e-5
tracker.optimizer.precision = 1e-3
tracker.optimizer.max_iteration = 200
let prediction = tracker.infer(knownStart: Tuple1(start.center))
return tracker.frameVariableIDs.map { varIds in
let poseId = varIds.head
return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols)
}
}

// Only do inference on the interesting tracks.
var evalDataset = OISTBeeVideo(truncate: truncate)!
evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] }
let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset)
let eval = trackerEvaluationDataset.evaluate(
tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: outputFile)
print(eval.trackerMetrics.accuracy)
print(eval.trackerMetrics.robustness)
}

func run() {
if verbose {
print("Loading dataset...")
}

startTimer("DATASET_LOAD")
let dataset: OISTBeeVideo = OISTBeeVideo(deferLoadingFrames: true)!
stopTimer("DATASET_LOAD")

if verbose {
print("Tracking...")
}

startTimer("RAE_TRACKING")
naiveRaeTrack(dataset: dataset)
stopTimer("RAE_TRACKING")

if verbose {
printTimers()
}

print(boxes.count)
Expand Down
142 changes: 68 additions & 74 deletions Examples/OISTVisualizationTool/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import Foundation

struct OISTVisualizationTool: ParsableCommand {
static var configuration = CommandConfiguration(
subcommands: [VisualizeTrack.self, ViewFrame.self, RawTrack.self, PpcaTrack.self, NaiveRae.self, TrainRAE.self, NaivePca.self])
subcommands: [VisualizePrediction.self, VisualizeTrack.self, ViewFrame.self, RawTrack.self, PpcaTrack.self, NaiveRae.self, TrainRAE.self, NaivePca.self])
}

/// View a frame with bounding boxes
Expand Down Expand Up @@ -510,26 +510,23 @@ struct TrainRAE: ParsableCommand {
///
/// Tracking with a Naive Bayes with RAE
struct NaivePca: ParsableCommand {
@Option(help: "Where to load the RAE weights")
var loadWeights: String = "./oist_rae_weight.npy"

@Option(help: "Which bounding box to track")
var boxId: Int = 0

@Option(help: "Track for how many frames")
var trackFrames: Int = 10

@Option(help: "Track the target from frame x")
var trackStartFrame: Int = 250

@Option(help: "The dimension of the latent code in the RAE appearance model")
var kLatentDimension = 10
var kLatentDimension = 20

@Flag(help: "Print progress information")
var verbose: Bool = false

@Flag(help: "Use random projections instead of learned PPCA vectors")
var randomProjections: Bool = false

@Option
var outputFile: String

@Option
var truncate: Int

/// Returns predictions for `videoName` using the raw pixel tracker.
func naivePpcaTrack(dataset dataset_: OISTBeeVideo, length: Int, startFrom: Int) -> [OrientedBoundingBox] {
func naivePpcaTrack(dataset dataset_: OISTBeeVideo) {
var dataset = dataset_
dataset.labels = dataset.labels.map {
$0.filter({ $0.label == .Body })
Expand All @@ -549,7 +546,11 @@ struct NaivePca: ParsableCommand {
var ppca = PPCA(latentSize: kLatentDimension)

ppca.train(images: batch)


if randomProjections {
ppca.W_inv = Tensor(randomNormal: ppca.W_inv!.shape)
}

if verbose { print("Fitting Naive Bayes model") }

var (foregroundModel, backgroundModel) = (
Expand All @@ -573,48 +574,32 @@ struct NaivePca: ParsableCommand {
print("Background: \(backgroundModel)")
}

if verbose { print("Loading video frames...") }
startTimer("VIDEO_LOAD")
// Load the video and take a slice of it.
let videos = (0..<length).map { (i) -> Tensor<Float> in
return withDevice(.cpu) { dataset.loadFrame(dataset.frameIds[startFrom + i])! }
}
stopTimer("VIDEO_LOAD")

let startPose = dataset.labels[startFrom][boxId].location.center

if verbose {
print("Creating tracker, startPose = \(startPose)")
}

startTimer("MAKE_GRAPH")
var tracker = makeNaiveBayesPCATracker(
model: ppca,
statistics: statistics,
frames: videos,
targetSize: (dataset.labels[startFrom][boxId].location.rows, dataset.labels[startFrom][boxId].location.cols),
foregroundModel: foregroundModel, backgroundModel: backgroundModel
)
stopTimer("MAKE_GRAPH")

if verbose { print("Starting Optimization...") }
if verbose { tracker.optimizer.verbosity = .SUMMARY }

tracker.optimizer.cgls_precision = 1e-7
tracker.optimizer.precision = 1e-4
tracker.optimizer.max_iteration = 200

startTimer("GRAPH_INFER")
let prediction = tracker.infer(knownStart: Tuple1(startPose))
stopTimer("GRAPH_INFER")

let boxes = tracker.frameVariableIDs.map { frameVariableIDs -> OrientedBoundingBox in
let poseID = frameVariableIDs.head
return OrientedBoundingBox(
center: prediction[poseID], rows: dataset.labels[startFrom][boxId].location.rows, cols: dataset.labels[startFrom][boxId].location.cols)
func tracker(_ frames: [Tensor<Float>], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] {
var tracker = makeNaiveBayesPCATracker(
model: ppca,
statistics: statistics,
frames: frames,
targetSize: (start.rows, start.cols),
foregroundModel: foregroundModel, backgroundModel: backgroundModel
)
tracker.optimizer.cgls_precision = 1e-9
tracker.optimizer.precision = 1e-6
tracker.optimizer.max_iteration = 200
let prediction = tracker.infer(knownStart: Tuple1(start.center))
return tracker.frameVariableIDs.map { varIds in
let poseId = varIds.head
return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols)
}
}

return boxes
// Only do inference on the interesting tracks.
var evalDataset = OISTBeeVideo(truncate: truncate)!
evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] }
let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset)
let eval = trackerEvaluationDataset.evaluate(
tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: outputFile)
print(eval.trackerMetrics.accuracy)
print(eval.trackerMetrics.robustness)
}

func run() {
Expand All @@ -631,25 +616,8 @@ struct NaivePca: ParsableCommand {
}

startTimer("PPCA_TRACKING")
var bboxes: [OrientedBoundingBox]
bboxes = naivePpcaTrack(dataset: dataset, length: trackFrames, startFrom: trackStartFrame)
naivePpcaTrack(dataset: dataset)
stopTimer("PPCA_TRACKING")

let frameRawId = dataset.frameIds[trackStartFrame + trackFrames]
let image = dataset.loadFrame(frameRawId)!

if verbose {
print("Creating output plot")
}
startTimer("PLOTTING")
plot(image, boxes: bboxes.indices.map {
("\($0)", bboxes[$0])
}, margin: 10.0, scale: 0.5).show()
stopTimer("PLOTTING")

if verbose {
printTimers()
}
}
}

Expand All @@ -666,6 +634,32 @@ struct VisualizeTrack: ParsableCommand {
}
}

struct VisualizePrediction: ParsableCommand {
@Option
var prediction: String

@Option
var subsequenceIndex: Int = 0

// TODO: I think I should save this in the prediction so that we do not need to specify it!
@Option
var startFrame: Int

@Option
var output: String

func run() {
let dataset = OISTBeeVideo(deferLoadingFrames: true)!
let decoder = JSONDecoder()
let data = try! Data(contentsOf: URL(fileURLWithPath: prediction))
let sequence = try! decoder.decode(SequenceEvaluationResults.self, from: data)

let track = OISTBeeTrack(
startFrameIndex: startFrame, boxes: sequence.subsequences[subsequenceIndex].prediction)
track.render(to: output, video: dataset)
}
}

// It is important to set the global threadpool before doing anything else, so that nothing
// accidentally uses the default threadpool.
ComputeThreadPools.global =
Expand Down
12 changes: 6 additions & 6 deletions Sources/BeeTracking/OISTBeeVideo+Batches.swift
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ extension OISTBeeVideo {
Double.random(in: Double(maxSide)..<Double(frame.shape[1] - maxSide), using: &deterministicEntropy),
Double.random(in: Double(maxSide)..<Double(frame.shape[0] - maxSide), using: &deterministicEntropy))

// Conservatively reject any point that could possibly overlap with any of the labels.
for label in labels {
if (label.location.center.t - location).norm < Double(maxSide) {
continue
}
}
// // Conservatively reject any point that could possibly overlap with any of the labels.
// for label in labels {
// if (label.location.center.t - location).norm < Double(maxSide) {
// continue
// }
// }

// The point was not rejected, so return it.
return location
Expand Down
Loading