Skip to content

Commit

Permalink
ADD: add working stereo matching
Browse files Browse the repository at this point in the history
  • Loading branch information
T-K-233 committed Jun 21, 2024
1 parent 15ef7b3 commit d7b72c3
Show file tree
Hide file tree
Showing 17 changed files with 466 additions and 168 deletions.
49 changes: 0 additions & 49 deletions example/fast-depth/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,55 +21,6 @@ extern size_t model_input_start[];
extern size_t model_input_end[];



size_t n_mapping = 92;

uint8_t ascii_map[] = {0X20, 0X60, 0X2E, 0X2D, 0X27, 0X3A, 0X5F, 0X2C, 0X5E, 0X3D, 0X3B, 0X3E, 0X3C, 0X2B, 0X21, 0X72, 0X63, 0X2A, 0X2F, 0X7A, 0X3F, 0X73, 0X4C, 0X54, 0X76, 0X29, 0X4A, 0X37, 0X28, 0X7C, 0X46, 0X69, 0X7B, 0X43, 0X7D, 0X66, 0X49, 0X33, 0X31, 0X74, 0X6C, 0X75, 0X5B, 0X6E, 0X65, 0X6F, 0X5A, 0X35, 0X59, 0X78, 0X6A, 0X79, 0X61, 0X5D, 0X32, 0X45, 0X53, 0X77, 0X71, 0X6B, 0X50, 0X36, 0X68, 0X39, 0X64, 0X34, 0X56, 0X70, 0X4F, 0X47, 0X62, 0X55, 0X41, 0X4B, 0X58, 0X48, 0X6D, 0X38, 0X52, 0X44, 0X23, 0X24, 0X42, 0X67, 0X30, 0X4D, 0X4E, 0X57, 0X51, 0X25, 0X26, 0X40};

float brightness_map[] = {0, 0.0751, 0.0829, 0.0848, 0.1227, 0.1403, 0.1559, 0.185, 0.2183, 0.2417, 0.2571, 0.2852, 0.2902, 0.2919, 0.3099, 0.3192, 0.3232, 0.3294, 0.3384, 0.3609, 0.3619, 0.3667, 0.3737, 0.3747, 0.3838, 0.3921, 0.396, 0.3984, 0.3993, 0.4075, 0.4091, 0.4101, 0.42, 0.423, 0.4247, 0.4274, 0.4293, 0.4328, 0.4382, 0.4385, 0.442, 0.4473, 0.4477, 0.4503, 0.4562, 0.458, 0.461, 0.4638, 0.4667, 0.4686, 0.4693, 0.4703, 0.4833, 0.4881, 0.4944, 0.4953, 0.4992, 0.5509, 0.5567, 0.5569, 0.5591, 0.5602, 0.5602, 0.565, 0.5776, 0.5777, 0.5818, 0.587, 0.5972, 0.5999, 0.6043, 0.6049, 0.6093, 0.6099, 0.6465, 0.6561, 0.6595, 0.6631, 0.6714, 0.6759, 0.6809, 0.6816, 0.6925, 0.7039, 0.7086, 0.7235, 0.7302, 0.7332, 0.7602, 0.7834, 0.8037, 0.9999};

void showASCIIImage(Tensor *tensor) {
assert(tensor->ndim == 4);
assert(tensor->shape[0] == 1);
assert(tensor->shape[3] == 1);
float min = 1000;
float max = -1000;
for (size_t h = 0; h < tensor->shape[1]; h += 1) {
for (size_t w = 0; w < tensor->shape[2]; w += 1) {
float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];
if (pixel_value < min) {
min = pixel_value;
}
if (pixel_value > max) {
max = pixel_value;
}
}
}

for (size_t h = 0; h < tensor->shape[1]; h += 1) {
for (size_t w = 0; w < tensor->shape[2]; w += 1) {
float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];

// normalize the pixel value to the range [0, 1]
pixel_value = (pixel_value - min) / (max - min);

// find the closest brightness value in the brightness_map
size_t brightness_index = 0;
for (size_t i = 0; i < n_mapping; i += 1) {
if (pixel_value < brightness_map[i]) {
break;
}
brightness_index = i;
}

// find the corresponding ASCII character
uint8_t ascii = ascii_map[brightness_index];
printf("%c", ascii);
}
printf("\n");
}
}

// static void enable_vector_operations() {
// unsigned long mstatus;
// asm volatile("csrr %0, mstatus" : "=r"(mstatus));
Expand Down
57 changes: 57 additions & 0 deletions example/fast-depth/termimg.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>

#include "nn_tensor.h"


const size_t n_mapping = 92;

const uint8_t ascii_map[] = {0X20, 0X60, 0X2E, 0X2D, 0X27, 0X3A, 0X5F, 0X2C, 0X5E, 0X3D, 0X3B, 0X3E, 0X3C, 0X2B, 0X21, 0X72, 0X63, 0X2A, 0X2F, 0X7A, 0X3F, 0X73, 0X4C, 0X54, 0X76, 0X29, 0X4A, 0X37, 0X28, 0X7C, 0X46, 0X69, 0X7B, 0X43, 0X7D, 0X66, 0X49, 0X33, 0X31, 0X74, 0X6C, 0X75, 0X5B, 0X6E, 0X65, 0X6F, 0X5A, 0X35, 0X59, 0X78, 0X6A, 0X79, 0X61, 0X5D, 0X32, 0X45, 0X53, 0X77, 0X71, 0X6B, 0X50, 0X36, 0X68, 0X39, 0X64, 0X34, 0X56, 0X70, 0X4F, 0X47, 0X62, 0X55, 0X41, 0X4B, 0X58, 0X48, 0X6D, 0X38, 0X52, 0X44, 0X23, 0X24, 0X42, 0X67, 0X30, 0X4D, 0X4E, 0X57, 0X51, 0X25, 0X26, 0X40};

const float brightness_map[] = {0, 0.0751, 0.0829, 0.0848, 0.1227, 0.1403, 0.1559, 0.185, 0.2183, 0.2417, 0.2571, 0.2852, 0.2902, 0.2919, 0.3099, 0.3192, 0.3232, 0.3294, 0.3384, 0.3609, 0.3619, 0.3667, 0.3737, 0.3747, 0.3838, 0.3921, 0.396, 0.3984, 0.3993, 0.4075, 0.4091, 0.4101, 0.42, 0.423, 0.4247, 0.4274, 0.4293, 0.4328, 0.4382, 0.4385, 0.442, 0.4473, 0.4477, 0.4503, 0.4562, 0.458, 0.461, 0.4638, 0.4667, 0.4686, 0.4693, 0.4703, 0.4833, 0.4881, 0.4944, 0.4953, 0.4992, 0.5509, 0.5567, 0.5569, 0.5591, 0.5602, 0.5602, 0.565, 0.5776, 0.5777, 0.5818, 0.587, 0.5972, 0.5999, 0.6043, 0.6049, 0.6093, 0.6099, 0.6465, 0.6561, 0.6595, 0.6631, 0.6714, 0.6759, 0.6809, 0.6816, 0.6925, 0.7039, 0.7086, 0.7235, 0.7302, 0.7332, 0.7602, 0.7834, 0.8037, 0.9999};

void showASCIIImage(Tensor *tensor) {
assert(tensor->ndim == 4);
assert(tensor->shape[0] == 1);
assert(tensor->shape[3] == 1);
float min = 1000;
float max = -1000;
for (size_t h = 0; h < tensor->shape[1]; h += 1) {
for (size_t w = 0; w < tensor->shape[2]; w += 1) {
float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];
if (pixel_value < min) {
min = pixel_value;
}
if (pixel_value > max) {
max = pixel_value;
}
}
}

for (size_t h = 0; h < tensor->shape[1]; h += 1) {
for (size_t w = 0; w < tensor->shape[2]; w += 1) {
float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];

// normalize the pixel value to the range [0, 1]
pixel_value = (pixel_value - min) / (max - min);

// find the closest brightness value in the brightness_map
size_t brightness_index = 0;
for (size_t i = 0; i < n_mapping; i += 1) {
if (pixel_value < brightness_map[i]) {
break;
}
brightness_index = i;
}

// find the corresponding ASCII character
uint8_t ascii = ascii_map[brightness_index];
printf("%c", ascii);
}
printf("\n");
}
}
215 changes: 113 additions & 102 deletions example/stereo-block-matching/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ extern size_t right_end[];



#define IMG_HEIGHT 256
#define IMG_WIDTH 256



size_t file_size;

typedef struct {
int width;
int height;
unsigned char *data;
uint8_t *data;
} Image;

typedef struct {
Expand All @@ -34,63 +39,23 @@ typedef struct {
uint8_t *data;
} Disp_Image;

void free_image(Image *image) {
if (image) {
// free(image->data);
free(image);
}
}

void free_disp_image(Disp_Image *image) {
if (image) {
// free(image->data);
free(image);
}
}

Image *load_image(uint8_t *file_path) {
// Allocate memory for the image data
unsigned char *data = file_path;
if (!data) {
printf("Error: Memory allocation failed\n");
return NULL;
}

// Read the data from the file

// Create an Image structure and populate its fields
Image *image = (Image *)malloc(sizeof(Image));
if (!image) {
printf("Error: Memory allocation failed\n");
free(data);
return NULL;
}

image->height = 256;
image->width = 256;
// Set the image data pointer
image->data = data;

return image;
}


int square(char x) {
return (int)x * (int)x;
}

// Core function computing stereoBM
Disp_Image* compute_dispartiy(Image *left, Image *right, int min_disparity, int max_disparity, int half_block_size) {
Tensor* compute_dispartiy(Tensor *left, Tensor *right, int min_disparity, int max_disparity, size_t half_block_size) {
// allocate data for disparity, use calloc for 0 initialization
int SAD = 0;
int min_SAD = INT32_MAX;
int l_r, l_c, r_r, r_c;
int height = left->height;
int width = left->width;
int height = left->shape[1];
int width = left->shape[2];

int search_range = max_disparity - min_disparity;
int s_w = width - 2*half_block_size - search_range;
int s_h = height - 2*half_block_size;
int s_w = width - 2 * half_block_size - search_range;
int s_h = height - 2 * half_block_size;

int sad_iop = 0;

Expand All @@ -100,87 +65,133 @@ Disp_Image* compute_dispartiy(Image *left, Image *right, int min_disparity, int
return NULL;
}


Tensor *left_block = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)left->data);
Tensor *right_block = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)right->data);
Tensor *left_block_signed = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL);
Tensor *right_block_signed = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL);
Tensor *diff = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, NULL);
Tensor *diff_wide = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_I32, NULL);
Tensor *out = NN_tensor(1, (const size_t[]){1}, DTYPE_I32, NULL);

// Tensor *left_block = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U8, (uint8_t *)left->data);
// Tensor *right_block = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U8, (uint8_t *)right->data);
// Tensor *left_block_signed = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U32, NULL);
// Tensor *right_block_signed = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U32, NULL);
// Tensor *diff = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_I32, NULL);
// Tensor *out = NN_tensor(1, (const size_t[]){1}, DTYPE_I32, NULL);

// compute disparity
// outer loop iterating over blocks
for (int i=0+half_block_size; i<height-half_block_size; i++) {
for (int j=0+half_block_size-min_disparity; j<width-half_block_size-max_disparity; j++) {
// middle loop per block
min_SAD = INT32_MAX;
for (int offset=min_disparity; offset<max_disparity; offset++) {
SAD = 0;
// inner loop per pixel: compute SAD
for (l_r = i-half_block_size; l_r < half_block_size+i; l_r++) {
for (l_c = j-half_block_size; l_c < half_block_size+j; l_c++) {
r_r = l_r;
r_c = l_c + offset;
SAD += abs(left->data[l_r*width+l_c] - right->data[r_r*width+r_c]);
sad_iop++;

// for debugging
// if (i == half_block_size && j == half_block_size && offset == 5){
// printf("SAD: %x, l_data: %x, r_data: %x\n", SAD, left->data[l_r*width+l_c], right->data[r_r*width+r_c]);
// }
}
}
// reduction step
if (SAD < min_SAD) {
// for debugging
// if (i == half_block_size) {
// printf("Updated min_SAD: %x, SAD: %x, j: %d, offset: %d\n", min_SAD, SAD, j, offset);
// }
min_SAD = SAD;

disparity[(i-half_block_size)*(s_w)+j-half_block_size] = offset;
}
}
for (int i = half_block_size; i < height-half_block_size; i += 1) {
printf("i: %d / %d\n", i, height-half_block_size);
for (int j = half_block_size - min_disparity; j < width-half_block_size - max_disparity; j += 1) {
// printf("j: %d / %d\n", j, width-half_block_size - max_disparity);
// middle loop per block
min_SAD = INT32_MAX;
for (int offset = min_disparity; offset<max_disparity; offset += 1) {
SAD = 0;


// inner loop per pixel: compute SAD
// for (size_t row = i - half_block_size; row < i + half_block_size; row += 1) {
// for (size_t col = j - half_block_size; col < j + half_block_size; col += 1) {
// SAD += abs((int)(((uint8_t *)left->data)[row * width + col] - ((uint8_t *)right->data)[row * width + col + offset]));
// // printf("%d\n", (((uint8_t *)left->data)[row * width + col] - ((uint8_t *)right->data)[row * width + col + offset]));
// sad_iop += 1;
// }
// }


// tensor version

size_t row = i - half_block_size;
size_t col = j - half_block_size;
for (size_t row = i - half_block_size; row < half_block_size + i; row += 1) {
// for (size_t col = j - half_block_size; col < half_block_size + j; col += 1) {

left_block->data = ((uint8_t *)left->data) + row*width + col;
right_block->data = ((uint8_t *)right->data) + row*width + col + offset;

NN_sub(diff, left_block, right_block);

// // NN_printf(diff);

diff->dtype = DTYPE_I8;

NN_asType(diff_wide, diff);
diff->dtype = DTYPE_U8;

NN_absInplace(diff_wide);

// NN_printf(diff);

NN_sum(out, diff_wide);
SAD += ((int32_t *)out->data)[0];

// }
}
// printf("SAD: %d\n", SAD);
// return NULL;


// reduction step
if (SAD < min_SAD) {
// for debugging
// if (i == half_block_size) {
// printf("Updated min_SAD: %x, SAD: %x, j: %d, offset: %d\n", min_SAD, SAD, j, offset);
// }
min_SAD = SAD;

disparity[(i-half_block_size)*(s_w)+j-half_block_size] = offset;
}
}
// if (j > half_block_size - min_disparity + 2)
// return NULL;
}
}

printf("SAD IOPs: %d\n", sad_iop);
NN_freeTensorData(left_block_signed);
NN_freeTensorData(right_block_signed);
NN_freeTensorData(diff);
NN_freeTensorData(out);
NN_deleteTensor(left_block_signed);
NN_deleteTensor(right_block_signed);
NN_deleteTensor(diff);
NN_deleteTensor(out);
NN_deleteTensor(left_block);
NN_deleteTensor(right_block);

printf("SAD IOPs: %d\n", sad_iop);


Disp_Image *disparity_image = (Disp_Image *)malloc(sizeof(Disp_Image));
if (!disparity_image) {
printf("Error: Memory allocation failed\n");
free(disparity);
return NULL;
}
disparity_image->width = s_w;
disparity_image->height = s_h;
disparity_image->data = disparity;
Tensor *disparity_image = NN_tensor(4, (const size_t[]){1, s_h, s_w, 1}, DTYPE_U8, disparity);
return disparity_image;
}

int main() {

file_size = (size_t)left_end - (size_t)left_start;

Image *left_image = load_image(left_data);

printf("Loaded left image\n");

Image *right_image = load_image(right_data);

printf("Loaded right image\n");
Tensor *left_image = NN_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, left_data);
Tensor *right_image = NN_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, right_data);

Disp_Image *disparity_image = compute_dispartiy(left_image, right_image, 0, 32, 4);
Tensor *disparity_image = compute_dispartiy(left_image, right_image, 0, 32, 4);
// Save the disparity image

// write only the data

printf("printing result\n");
NN_printShape(disparity_image);
printf("\n");

Tensor *img = NN_tensor(4, (const size_t[]){1, disparity_image->height, disparity_image->width, 1}, DTYPE_U8, disparity_image->data);
Tensor *img_small = NN_zeros(4, (const size_t[]){1, disparity_image->height / 8, disparity_image->width / 4, 1}, DTYPE_U8);
Tensor *img_small = NN_zeros(4, (const size_t[]){1, disparity_image->shape[1] / 4, disparity_image->shape[2] / 2, 1}, DTYPE_U8);

NN_interpolate(img_small, img, (float []){0.125, 0.25});


NN_interpolate(img_small, disparity_image, (float []){0.25, 0.5});

showASCIIImage(img_small);


free_image(left_image);
free_image(right_image);
free_disp_image(disparity_image);
return 0;
}
Loading

0 comments on commit d7b72c3

Please sign in to comment.