ADD: add working stereo matching

ucb-bar · Jun 21, 2024 · d7b72c3 · d7b72c3
1 parent 15ef7b3
commit d7b72c3
Show file tree

Hide file tree

Showing 17 changed files with 466 additions and 168 deletions.
diff --git a/example/fast-depth/main.c b/example/fast-depth/main.c
@@ -21,55 +21,6 @@ extern size_t model_input_start[];
 extern size_t model_input_end[];
 
 
-
-size_t n_mapping = 92;
-
-uint8_t ascii_map[] = {0X20, 0X60, 0X2E, 0X2D, 0X27, 0X3A, 0X5F, 0X2C, 0X5E, 0X3D, 0X3B, 0X3E, 0X3C, 0X2B, 0X21, 0X72, 0X63, 0X2A, 0X2F, 0X7A, 0X3F, 0X73, 0X4C, 0X54, 0X76, 0X29, 0X4A, 0X37, 0X28, 0X7C, 0X46, 0X69, 0X7B, 0X43, 0X7D, 0X66, 0X49, 0X33, 0X31, 0X74, 0X6C, 0X75, 0X5B, 0X6E, 0X65, 0X6F, 0X5A, 0X35, 0X59, 0X78, 0X6A, 0X79, 0X61, 0X5D, 0X32, 0X45, 0X53, 0X77, 0X71, 0X6B, 0X50, 0X36, 0X68, 0X39, 0X64, 0X34, 0X56, 0X70, 0X4F, 0X47, 0X62, 0X55, 0X41, 0X4B, 0X58, 0X48, 0X6D, 0X38, 0X52, 0X44, 0X23, 0X24, 0X42, 0X67, 0X30, 0X4D, 0X4E, 0X57, 0X51, 0X25, 0X26, 0X40};
-
-float brightness_map[] = {0, 0.0751, 0.0829, 0.0848, 0.1227, 0.1403, 0.1559, 0.185, 0.2183, 0.2417, 0.2571, 0.2852, 0.2902, 0.2919, 0.3099, 0.3192, 0.3232, 0.3294, 0.3384, 0.3609, 0.3619, 0.3667, 0.3737, 0.3747, 0.3838, 0.3921, 0.396, 0.3984, 0.3993, 0.4075, 0.4091, 0.4101, 0.42, 0.423, 0.4247, 0.4274, 0.4293, 0.4328, 0.4382, 0.4385, 0.442, 0.4473, 0.4477, 0.4503, 0.4562, 0.458, 0.461, 0.4638, 0.4667, 0.4686, 0.4693, 0.4703, 0.4833, 0.4881, 0.4944, 0.4953, 0.4992, 0.5509, 0.5567, 0.5569, 0.5591, 0.5602, 0.5602, 0.565, 0.5776, 0.5777, 0.5818, 0.587, 0.5972, 0.5999, 0.6043, 0.6049, 0.6093, 0.6099, 0.6465, 0.6561, 0.6595, 0.6631, 0.6714, 0.6759, 0.6809, 0.6816, 0.6925, 0.7039, 0.7086, 0.7235, 0.7302, 0.7332, 0.7602, 0.7834, 0.8037, 0.9999};
-
-void showASCIIImage(Tensor *tensor) {
-  assert(tensor->ndim == 4);
-  assert(tensor->shape[0] == 1);
-  assert(tensor->shape[3] == 1);
-  float min = 1000;
-  float max = -1000;
-  for (size_t h = 0; h < tensor->shape[1]; h += 1) {
-    for (size_t w = 0; w < tensor->shape[2]; w += 1) {
-      float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];
-      if (pixel_value < min) {
-        min = pixel_value;
-      }
-      if (pixel_value > max) {
-        max = pixel_value;
-      }
-    }
-  }
-
-  for (size_t h = 0; h < tensor->shape[1]; h += 1) {
-    for (size_t w = 0; w < tensor->shape[2]; w += 1) {
-      float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];
-
-      // normalize the pixel value to the range [0, 1]
-      pixel_value = (pixel_value - min) / (max - min);
-
-      // find the closest brightness value in the brightness_map
-      size_t brightness_index = 0;
-      for (size_t i = 0; i < n_mapping; i += 1) {
-        if (pixel_value < brightness_map[i]) {
-          break;
-        }
-        brightness_index = i;
-      }
-
-      // find the corresponding ASCII character
-      uint8_t ascii = ascii_map[brightness_index];
-      printf("%c", ascii);
-    }
-    printf("\n");
-  }
-}
-
 // static void enable_vector_operations() {
 //     unsigned long mstatus;
 //     asm volatile("csrr %0, mstatus" : "=r"(mstatus));

diff --git a/example/fast-depth/termimg.h b/example/fast-depth/termimg.h
@@ -0,0 +1,57 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "nn_tensor.h"
+
+
+const size_t n_mapping = 92;
+
+const uint8_t ascii_map[] = {0X20, 0X60, 0X2E, 0X2D, 0X27, 0X3A, 0X5F, 0X2C, 0X5E, 0X3D, 0X3B, 0X3E, 0X3C, 0X2B, 0X21, 0X72, 0X63, 0X2A, 0X2F, 0X7A, 0X3F, 0X73, 0X4C, 0X54, 0X76, 0X29, 0X4A, 0X37, 0X28, 0X7C, 0X46, 0X69, 0X7B, 0X43, 0X7D, 0X66, 0X49, 0X33, 0X31, 0X74, 0X6C, 0X75, 0X5B, 0X6E, 0X65, 0X6F, 0X5A, 0X35, 0X59, 0X78, 0X6A, 0X79, 0X61, 0X5D, 0X32, 0X45, 0X53, 0X77, 0X71, 0X6B, 0X50, 0X36, 0X68, 0X39, 0X64, 0X34, 0X56, 0X70, 0X4F, 0X47, 0X62, 0X55, 0X41, 0X4B, 0X58, 0X48, 0X6D, 0X38, 0X52, 0X44, 0X23, 0X24, 0X42, 0X67, 0X30, 0X4D, 0X4E, 0X57, 0X51, 0X25, 0X26, 0X40};
+
+const float brightness_map[] = {0, 0.0751, 0.0829, 0.0848, 0.1227, 0.1403, 0.1559, 0.185, 0.2183, 0.2417, 0.2571, 0.2852, 0.2902, 0.2919, 0.3099, 0.3192, 0.3232, 0.3294, 0.3384, 0.3609, 0.3619, 0.3667, 0.3737, 0.3747, 0.3838, 0.3921, 0.396, 0.3984, 0.3993, 0.4075, 0.4091, 0.4101, 0.42, 0.423, 0.4247, 0.4274, 0.4293, 0.4328, 0.4382, 0.4385, 0.442, 0.4473, 0.4477, 0.4503, 0.4562, 0.458, 0.461, 0.4638, 0.4667, 0.4686, 0.4693, 0.4703, 0.4833, 0.4881, 0.4944, 0.4953, 0.4992, 0.5509, 0.5567, 0.5569, 0.5591, 0.5602, 0.5602, 0.565, 0.5776, 0.5777, 0.5818, 0.587, 0.5972, 0.5999, 0.6043, 0.6049, 0.6093, 0.6099, 0.6465, 0.6561, 0.6595, 0.6631, 0.6714, 0.6759, 0.6809, 0.6816, 0.6925, 0.7039, 0.7086, 0.7235, 0.7302, 0.7332, 0.7602, 0.7834, 0.8037, 0.9999};
+
+void showASCIIImage(Tensor *tensor) {
+  assert(tensor->ndim == 4);
+  assert(tensor->shape[0] == 1);
+  assert(tensor->shape[3] == 1);
+  float min = 1000;
+  float max = -1000;
+  for (size_t h = 0; h < tensor->shape[1]; h += 1) {
+    for (size_t w = 0; w < tensor->shape[2]; w += 1) {
+      float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];
+      if (pixel_value < min) {
+        min = pixel_value;
+      }
+      if (pixel_value > max) {
+        max = pixel_value;
+      }
+    }
+  }
+
+  for (size_t h = 0; h < tensor->shape[1]; h += 1) {
+    for (size_t w = 0; w < tensor->shape[2]; w += 1) {
+      float pixel_value = ((float *)tensor->data)[h * tensor->shape[2] + w];
+
+      // normalize the pixel value to the range [0, 1]
+      pixel_value = (pixel_value - min) / (max - min);
+
+      // find the closest brightness value in the brightness_map
+      size_t brightness_index = 0;
+      for (size_t i = 0; i < n_mapping; i += 1) {
+        if (pixel_value < brightness_map[i]) {
+          break;
+        }
+        brightness_index = i;
+      }
+
+      // find the corresponding ASCII character
+      uint8_t ascii = ascii_map[brightness_index];
+      printf("%c", ascii);
+    }
+    printf("\n");
+  }
+}
diff --git a/example/stereo-block-matching/main.c b/example/stereo-block-matching/main.c
@@ -20,12 +20,17 @@ extern size_t right_end[];
 
 
 
+#define IMG_HEIGHT    256
+#define IMG_WIDTH     256
+
+
+
 size_t file_size;
 
 typedef struct {
   int width;
   int height;
-  unsigned char *data;
+  uint8_t *data;
 } Image;
 
 typedef struct {
@@ -34,63 +39,23 @@ typedef struct {
   uint8_t *data;
 } Disp_Image;
 
-void free_image(Image *image) {
-  if (image) {
-    // free(image->data);
-    free(image);
-  }
-}
-
-void free_disp_image(Disp_Image *image) {
-  if (image) {
-    // free(image->data);
-    free(image);
-  }
-}
-
-Image *load_image(uint8_t *file_path) {
-  // Allocate memory for the image data
-  unsigned char *data = file_path;
-  if (!data) {
-    printf("Error: Memory allocation failed\n");
-    return NULL;
-  }
-
-  // Read the data from the file
-
-  // Create an Image structure and populate its fields
-  Image *image = (Image *)malloc(sizeof(Image));
-  if (!image) {
-    printf("Error: Memory allocation failed\n");
-    free(data);
-    return NULL;
-  }
-
-  image->height = 256;
-  image->width = 256;
-  // Set the image data pointer
-  image->data = data;
-
-  return image;
-}
 
 
 int square(char x) {
   return (int)x * (int)x;
 }
 
 // Core function computing stereoBM
-Disp_Image* compute_dispartiy(Image *left, Image *right, int min_disparity, int max_disparity, int half_block_size) {
+Tensor* compute_dispartiy(Tensor *left, Tensor *right, int min_disparity, int max_disparity, size_t half_block_size) {
   // allocate data for disparity, use calloc for 0 initialization
   int SAD = 0;
   int min_SAD = INT32_MAX;
-  int l_r, l_c, r_r, r_c;
-  int height = left->height;
-  int width = left->width;
+  int height = left->shape[1];
+  int width = left->shape[2];
 
   int search_range = max_disparity - min_disparity;
-  int s_w = width - 2*half_block_size - search_range;
-  int s_h = height - 2*half_block_size;
+  int s_w = width - 2 * half_block_size - search_range;
+  int s_h = height - 2 * half_block_size;
 
   int sad_iop = 0;
 
@@ -100,87 +65,133 @@ Disp_Image* compute_dispartiy(Image *left, Image *right, int min_disparity, int
     return NULL;
   }
 
+
+  Tensor *left_block = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)left->data);
+  Tensor *right_block = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)right->data);
+  Tensor *left_block_signed = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL);
+  Tensor *right_block_signed = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL);
+  Tensor *diff = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, NULL);
+  Tensor *diff_wide = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_I32, NULL);
+  Tensor *out = NN_tensor(1, (const size_t[]){1}, DTYPE_I32, NULL);
+
+  // Tensor *left_block = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U8, (uint8_t *)left->data);
+  // Tensor *right_block = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U8, (uint8_t *)right->data);
+  // Tensor *left_block_signed = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U32, NULL);
+  // Tensor *right_block_signed = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_U32, NULL);
+  // Tensor *diff = NN_tensor(2, (const size_t[]){1, 1}, DTYPE_I32, NULL);
+  // Tensor *out = NN_tensor(1, (const size_t[]){1}, DTYPE_I32, NULL);
+
   // compute disparity
   // outer loop iterating over blocks
-  for (int i=0+half_block_size; i<height-half_block_size; i++) {
-      for (int j=0+half_block_size-min_disparity; j<width-half_block_size-max_disparity; j++) {
-          // middle loop per block
-          min_SAD = INT32_MAX;
-          for (int offset=min_disparity; offset<max_disparity; offset++) {
-              SAD = 0;
-              // inner loop per pixel: compute SAD
-              for (l_r = i-half_block_size; l_r < half_block_size+i; l_r++) {
-                  for (l_c = j-half_block_size; l_c < half_block_size+j; l_c++) {
-                      r_r = l_r;
-                      r_c = l_c + offset;
-                      SAD += abs(left->data[l_r*width+l_c] - right->data[r_r*width+r_c]);
-                      sad_iop++;
-
-                      // for debugging
-                      // if (i == half_block_size && j == half_block_size && offset == 5){
-                      //     printf("SAD: %x, l_data: %x, r_data: %x\n", SAD, left->data[l_r*width+l_c], right->data[r_r*width+r_c]);
-                      // }
-                  }
-              }
-              // reduction step
-              if (SAD < min_SAD) {
-                  // for debugging
-                  // if (i == half_block_size) {
-                  //     printf("Updated min_SAD: %x, SAD: %x, j: %d, offset: %d\n", min_SAD, SAD, j, offset);
-                  // }
-                  min_SAD = SAD;
-
-                  disparity[(i-half_block_size)*(s_w)+j-half_block_size] = offset;
-              }
-          }
+  for (int i = half_block_size; i < height-half_block_size; i += 1) {
+    printf("i: %d / %d\n", i, height-half_block_size);
+    for (int j = half_block_size - min_disparity; j < width-half_block_size - max_disparity; j += 1) {
+      // printf("j: %d / %d\n", j, width-half_block_size - max_disparity);
+      // middle loop per block
+      min_SAD = INT32_MAX;
+      for (int offset = min_disparity; offset<max_disparity; offset += 1) {
+        SAD = 0;
+
+
+        // inner loop per pixel: compute SAD
+        // for (size_t row = i - half_block_size; row < i + half_block_size; row += 1) {
+        //   for (size_t col = j - half_block_size; col < j + half_block_size; col += 1) {
+        //     SAD += abs((int)(((uint8_t *)left->data)[row * width + col] - ((uint8_t *)right->data)[row * width + col + offset]));
+        //     // printf("%d\n", (((uint8_t *)left->data)[row * width + col] - ((uint8_t *)right->data)[row * width + col + offset]));
+        //     sad_iop += 1;
+        //   }
+        // }
+
+
+        // tensor version
+
+        size_t row = i - half_block_size;
+        size_t col = j - half_block_size;
+        for (size_t row = i - half_block_size; row < half_block_size + i; row += 1) {
+        //   for (size_t col = j - half_block_size; col < half_block_size + j; col += 1) {
+
+            left_block->data = ((uint8_t *)left->data) + row*width + col;
+            right_block->data = ((uint8_t *)right->data) + row*width + col + offset;
+
+            NN_sub(diff, left_block, right_block);
+
+            // // NN_printf(diff);
+
+            diff->dtype = DTYPE_I8;
+
+            NN_asType(diff_wide, diff);
+            diff->dtype = DTYPE_U8;
+
+            NN_absInplace(diff_wide);
+
+            // NN_printf(diff);
+
+            NN_sum(out, diff_wide);
+            SAD += ((int32_t *)out->data)[0];
+
+        //   }
+        }
+        // printf("SAD: %d\n", SAD);
+        // return NULL;
+
+
+        // reduction step
+        if (SAD < min_SAD) {
+          // for debugging
+          // if (i == half_block_size) {
+          //     printf("Updated min_SAD: %x, SAD: %x, j: %d, offset: %d\n", min_SAD, SAD, j, offset);
+          // }
+          min_SAD = SAD;
+
+          disparity[(i-half_block_size)*(s_w)+j-half_block_size] = offset;
+        }
       }
+      // if (j > half_block_size - min_disparity + 2)
+      // return NULL;
+    }
   }
 
-  printf("SAD IOPs: %d\n", sad_iop);
+  NN_freeTensorData(left_block_signed);
+  NN_freeTensorData(right_block_signed);
+  NN_freeTensorData(diff);
+  NN_freeTensorData(out);
+  NN_deleteTensor(left_block_signed);
+  NN_deleteTensor(right_block_signed);
+  NN_deleteTensor(diff);
+  NN_deleteTensor(out);
+  NN_deleteTensor(left_block);
+  NN_deleteTensor(right_block);
 
+  printf("SAD IOPs: %d\n", sad_iop);
 
 
-  Disp_Image *disparity_image = (Disp_Image *)malloc(sizeof(Disp_Image));
-  if (!disparity_image) {
-      printf("Error: Memory allocation failed\n");
-      free(disparity);
-      return NULL;
-  }
-  disparity_image->width = s_w;
-  disparity_image->height = s_h;
-  disparity_image->data = disparity;
+  Tensor *disparity_image = NN_tensor(4, (const size_t[]){1, s_h, s_w, 1}, DTYPE_U8, disparity);
   return disparity_image;
 }
 
 int main() {
 
   file_size = (size_t)left_end - (size_t)left_start;
 
-  Image *left_image = load_image(left_data);
-
-  printf("Loaded left image\n");
-
-  Image *right_image = load_image(right_data);
-
-  printf("Loaded right image\n");
+  Tensor *left_image = NN_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, left_data);
+  Tensor *right_image = NN_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, right_data);
 
-  Disp_Image *disparity_image = compute_dispartiy(left_image, right_image, 0, 32, 4);
+  Tensor *disparity_image = compute_dispartiy(left_image, right_image, 0, 32, 4);
   // Save the disparity image
 
   // write only the data
 
   printf("printing result\n");
+  NN_printShape(disparity_image);
+  printf("\n");
 
-  Tensor *img = NN_tensor(4, (const size_t[]){1, disparity_image->height, disparity_image->width, 1}, DTYPE_U8, disparity_image->data);
-  Tensor *img_small = NN_zeros(4, (const size_t[]){1, disparity_image->height / 8, disparity_image->width / 4, 1}, DTYPE_U8);
+  Tensor *img_small = NN_zeros(4, (const size_t[]){1, disparity_image->shape[1] / 4, disparity_image->shape[2] / 2, 1}, DTYPE_U8);
 
-  NN_interpolate(img_small, img, (float []){0.125, 0.25});
+
+
+  NN_interpolate(img_small, disparity_image, (float []){0.25, 0.5});
 
   showASCIIImage(img_small);
 
-
-  free_image(left_image);
-  free_image(right_image);
-  free_disp_image(disparity_image);
   return 0;
 }