diff --git a/geo_seg_success_sam_without_pca.ipynb b/geo_seg_success_sam_without_pca.ipynb new file mode 100644 index 0000000..4b4a76a --- /dev/null +++ b/geo_seg_success_sam_without_pca.ipynb @@ -0,0 +1,505 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import open3d as o3d\n", + "\n", + "full_pcd = o3d.io.read_point_cloud('ACMMP_model.ply')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-06-23 11:07:16.330 Python[772:9766839] WARNING: Secure coding is automatically enabled for restorable state! However, not on all supported macOS versions of this application. Opt-in to secure coding explicitly by implementing NSApplicationDelegate.applicationSupportsSecureRestorableState:.\n" + ] + } + ], + "source": [ + "o3d.visualization.draw_geometries([full_pcd])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finish: 0/4 x and 0/4 y\n", + "Finish: 0/4 x and 1/4 y\n", + "Finish: 0/4 x and 2/4 y\n", + "Finish: 0/4 x and 3/4 y\n", + "Finish: 0/4 x and 4/4 y\n", + "Finish: 1/4 x and 0/4 y\n", + "Finish: 1/4 x and 1/4 y\n", + "Finish: 1/4 x and 2/4 y\n", + "Finish: 1/4 x and 3/4 y\n", + "Finish: 1/4 x and 4/4 y\n", + "Finish: 2/4 x and 0/4 y\n", + "Finish: 2/4 x and 1/4 y\n", + "Finish: 2/4 x and 2/4 y\n", + "Finish: 2/4 x and 3/4 y\n", + "Finish: 2/4 x and 4/4 y\n", + "Finish: 3/4 x and 0/4 y\n", + "Finish: 3/4 x and 1/4 y\n", + "Finish: 3/4 x and 2/4 y\n", + "Finish: 3/4 x and 3/4 y\n", + "Finish: 3/4 x and 4/4 y\n", + "Finish: 4/4 x and 0/4 y\n", + "Finish: 4/4 x and 1/4 y\n", + "Finish: 4/4 x and 2/4 y\n", + "Finish: 4/4 x and 3/4 y\n", + "Finish: 4/4 x and 4/4 y\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "def is_point_in_square(point, x_start, x_end, y_start, y_end):\n", + " return point[0] >= x_start and point[0] < x_end and point[1] >= y_start and point[1] < y_end\n", + "\n", + "\n", + "def divide_pcd_into_square_segments(points, colors, num_segments):\n", + " x_min, x_max = np.min(points[:, 0]), np.max(points[:, 0])\n", + " y_min, y_max = np.min(points[:, 1]), np.max(points[:, 1])\n", + "\n", + " x_step = (x_max - x_min) / num_segments\n", + " y_step = (y_max - y_min) / num_segments\n", + "\n", + " x_grid = np.arange(x_min, x_max, x_step)\n", + " y_grid = np.arange(y_min, y_max, y_step)\n", + " x, y = np.meshgrid(x_grid, y_grid)\n", + "\n", + " segments = []\n", + " for i in range(len(x_grid) - 1):\n", + " for j in range(len(y_grid) - 1):\n", + " # current square\n", + " x_start, x_end = x[i, j], x[i, j + 1]\n", + " y_start, y_end = y[i, j], y[i + 1, j]\n", + "\n", + " segment_points = []\n", + " segment_points_colors = []\n", + " for ind, point in enumerate(points):\n", + " if is_point_in_square(point, x_start, x_end, y_start, y_end):\n", + " segment_points.append(point)\n", + " segment_points_colors.append(colors[ind])\n", + " segment_points_array = np.asarray(segment_points, dtype=np.float32)\n", + " segment_points_colors_array = np.asarray(segment_points_colors, dtype=np.float32)\n", + "\n", + " dict = {\n", + " 'points': segment_points_array,\n", + " 'colors': segment_points_colors_array\n", + " }\n", + " segments.append(dict)\n", + " print(\"Finish: {}/{} x and {}/{} y\".format(i, len(x_grid) - 1 - 1, j, len(y_grid) - 1 - 1))\n", + "\n", + " return segments\n", + "\n", + "segment_dicts = divide_pcd_into_square_segments(np.asarray(full_pcd.points), np.asarray(full_pcd.colors), num_segments=6)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4679809" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(segment_dicts[7]['points'])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-06-23 11:29:43.752 Python[929:9775455] WARNING: Secure coding is automatically enabled for restorable state! However, not on all supported macOS versions of this application. Opt-in to secure coding explicitly by implementing NSApplicationDelegate.applicationSupportsSecureRestorableState:.\n" + ] + } + ], + "source": [ + "final_pcd_segment = o3d.geometry.PointCloud()\n", + "final_pcd_segment.points = o3d.utility.Vector3dVector(segment_dicts[7]['points'])\n", + "final_pcd_segment.colors = o3d.utility.Vector3dVector(segment_dicts[7]['colors'])\n", + "o3d.visualization.draw_geometries([final_pcd_segment])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# перенос масок sam" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class Camera:\n", + " def __init__(self):\n", + " self.K = [0.0] * 9\n", + " self.R = [0.0] * 9\n", + " self.t = [0.0] * 3\n", + " self.height = 0\n", + " self.width = 0\n", + " self.depth_min = 0.0\n", + " self.depth_max = 0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_camera(cam_path):\n", + " camera = Camera()\n", + " with open(cam_path, \"r\") as file:\n", + " line = file.readline().strip()\n", + " for i in range(3):\n", + " (\n", + " camera.R[3 * i + 0],\n", + " camera.R[3 * i + 1],\n", + " camera.R[3 * i + 2],\n", + " camera.t[i],\n", + " ) = map(float, file.readline().split())\n", + "\n", + " tmp = list(map(float, file.readline().split()))\n", + " line = file.readline().strip()\n", + " line = file.readline().strip()\n", + "\n", + " for i in range(3):\n", + " camera.K[3 * i + 0], camera.K[3 * i + 1], camera.K[3 * i + 2] = map(\n", + " float, file.readline().split()\n", + " )\n", + "\n", + " line = file.readline().strip()\n", + " camera.depth_min, interval, depth_num, camera.depth_max = map(\n", + " float, file.readline().split()\n", + " )\n", + "\n", + " return camera" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PointCloud with 4679809 points." + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "import open3d as o3d\n", + "import copy\n", + "\n", + "cams_file = \"images_cams/cams/00000000_cam.txt\"\n", + "camera = read_camera(str(cams_file))\n", + "trans = np.asarray(\n", + " [\n", + " [camera.R[0], camera.R[1], camera.R[2], camera.t[0]],\n", + " [camera.R[3], camera.R[4], camera.R[5], camera.t[1]],\n", + " [camera.R[6], camera.R[7], camera.R[8], camera.t[2]],\n", + " [0, 0, 0, 1],\n", + " ]\n", + ")\n", + "pcd_segment_copy = copy.deepcopy(final_pcd_segment)\n", + "pcd_segment_copy.transform(trans)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3379389\n" + ] + } + ], + "source": [ + "def hidden_removal_points(pcd):\n", + " diameter = np.linalg.norm(\n", + " np.asarray(pcd.get_max_bound()) - np.asarray(pcd.get_min_bound())\n", + " )\n", + " cam = [0, 0, 0]\n", + " radius = diameter * 100000\n", + " _, indices = pcd.hidden_point_removal(cam, radius)\n", + " return indices\n", + "\n", + "indices = hidden_removal_points(pcd_segment_copy)\n", + "print(len(indices))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + } + ], + "source": [ + "def get_subpcd(pcd, indices):\n", + " subpcd = o3d.geometry.PointCloud()\n", + " subpcd.points = o3d.utility.Vector3dVector(np.asarray(pcd.points)[indices])\n", + " subpcd.colors = o3d.utility.Vector3dVector(np.asarray(pcd.colors)[indices])\n", + " return subpcd\n", + "\n", + "pcd_hidden_removal = get_subpcd(pcd_segment_copy, indices)\n", + "o3d.visualization.draw_geometries([pcd_hidden_removal])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "def get_image_instances():\n", + " masks_path = Path.cwd().joinpath(\n", + " \"geo-seg/vfm-labels/sam/00000000.npz\"\n", + " )\n", + " return np.load(masks_path, allow_pickle=True)[\"masks\"]\n", + "\n", + "masks = get_image_instances()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def masks_to_image(masks):\n", + " image_labels = np.zeros(masks[0][\"segmentation\"].shape)\n", + " for i, mask in enumerate(masks):\n", + " image_labels[mask[\"segmentation\"]] = i + 1\n", + " return image_labels" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "image_labels = masks_to_image(masks)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def get_points_to_pixels(points, img_shape):\n", + " img_width, img_height = img_shape\n", + "\n", + " intrinsic = np.asarray(\n", + " [\n", + " [camera.K[0], camera.K[1], camera.K[2]],\n", + " [camera.K[3], camera.K[4], camera.K[5]],\n", + " [camera.K[6], camera.K[7], camera.K[8]],\n", + " ]\n", + " )\n", + "\n", + " points_proj = intrinsic @ points.T\n", + " points_proj[:2, :] /= points_proj[2, :]\n", + " points_coord = points_proj.T\n", + "\n", + " inds = np.where(\n", + " (points_coord[:, 0] < img_width)\n", + " & (points_coord[:, 0] >= 0)\n", + " & (points_coord[:, 1] < img_height)\n", + " & (points_coord[:, 1] >= 0)\n", + " & (points_coord[:, 2] > 0)\n", + " )[0]\n", + " print(len(inds))\n", + "\n", + " points_ind_to_pixels = {}\n", + " for ind in inds:\n", + " points_ind_to_pixels[ind] = points_coord[ind][:2].astype(int)\n", + "\n", + " return points_ind_to_pixels\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2135788\n" + ] + } + ], + "source": [ + "points2instances = np.zeros((len(pcd_segment_copy.points), 1), dtype=int)\n", + "\n", + "points_to_pixels = get_points_to_pixels(\n", + " np.asarray(pcd_hidden_removal.points),\n", + " ((image_labels.shape[1], image_labels.shape[0])),\n", + ")\n", + "\n", + "for point_id, pixel_id in points_to_pixels.items():\n", + " points2instances[indices[point_id], 0] = int(\n", + " image_labels[pixel_id[1], pixel_id[0]]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "def generate_random_colors(N):\n", + " colors = [[0, 0, 0]]\n", + " for _ in range(N):\n", + " colors.append(\n", + " [random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)]\n", + " )\n", + "\n", + " colors = np.vstack(colors) / 255\n", + " return colors" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def color_pcd_by_labels(pcd, labels):\n", + " colors = generate_random_colors(len(labels) + 1)\n", + " pcd_colored = copy.deepcopy(pcd)\n", + " pcd_colored.colors = o3d.utility.Vector3dVector(\n", + " np.zeros(np.asarray(pcd.points).shape)\n", + " )\n", + "\n", + " for i in range(len(pcd_colored.points)):\n", + " pcd_colored.colors[i] = colors[labels[i]]\n", + "\n", + " return pcd_colored" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "pcd_colored = color_pcd_by_labels(pcd_segment_copy, points2instances[:, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;33m[Open3D WARNING] GLFW Error: Cocoa: Failed to find service port for display\u001b[0;m\n" + ] + } + ], + "source": [ + "o3d.visualization.draw_geometries([pcd_colored])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}