From b912ff8f0101eca49b9e1ba0568886c9dc565118 Mon Sep 17 00:00:00 2001 From: Kangrui Wang Date: Mon, 8 Apr 2024 20:50:08 -0500 Subject: [PATCH] added scene_graphs --- igibson/scene_graphs/__init__.py | 0 .../scene_graphs/behavior_demo_sg_exporter.py | 53 ++++ .../behavior_demo_to_scene_graph.py | 53 ++++ igibson/scene_graphs/enum_obj_states.py | 61 ++++ igibson/scene_graphs/graph_builder.py | 285 ++++++++++++++++++ igibson/scene_graphs/graph_exporter.py | 90 ++++++ igibson/utils/utils.py | 9 + 7 files changed, 551 insertions(+) create mode 100644 igibson/scene_graphs/__init__.py create mode 100644 igibson/scene_graphs/behavior_demo_sg_exporter.py create mode 100644 igibson/scene_graphs/behavior_demo_to_scene_graph.py create mode 100644 igibson/scene_graphs/enum_obj_states.py create mode 100644 igibson/scene_graphs/graph_builder.py create mode 100644 igibson/scene_graphs/graph_exporter.py diff --git a/igibson/scene_graphs/__init__.py b/igibson/scene_graphs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/igibson/scene_graphs/behavior_demo_sg_exporter.py b/igibson/scene_graphs/behavior_demo_sg_exporter.py new file mode 100644 index 000000000..334fb8e03 --- /dev/null +++ b/igibson/scene_graphs/behavior_demo_sg_exporter.py @@ -0,0 +1,53 @@ +import argparse +import os + +import h5py + +import igibson +from igibson.examples.behavior.behavior_demo_batch import behavior_demo_batch +from igibson.scene_graphs.graph_exporter import SceneGraphExporter + + +def parse_args(): + testdir = os.path.join(igibson.ig_dataset_path, "tests") + manifest = os.path.join(testdir, "test_manifest.txt") + parser = argparse.ArgumentParser(description="Build live scene graphs from BEHAVIOR demos in manifest.") + parser.add_argument( + "--demo_root", type=str, default=testdir, help="Directory containing demos listed in the manifest." + ) + parser.add_argument( + "--log_manifest", type=str, default=manifest, help="Plain text file consisting of list of demos to replay." + ) + parser.add_argument("--out_dir", type=str, default=testdir, help="Directory to store results in.") + parser.add_argument("--num_frames_per_demo", type=int, default=None, help="Number of frames to save per demo.") + return parser.parse_args() + + +def main(): + args = parse_args() + + def get_scene_graph_callbacks(demo_name, out_dir): + path = os.path.join(out_dir, demo_name + "_sg_data.hdf5") + h5py_file = h5py.File(path, "w") + extractors = [SceneGraphExporter(h5py_file, full_obs=True, num_frames_to_save=args.num_frames_per_demo)] + + return ( + [extractor.start for extractor in extractors], + [extractor.step for extractor in extractors], + [lambda a, b: h5py_file.close()], + [], + ) + + behavior_demo_batch( + args.demo_root, + args.log_manifest, + args.out_dir, + get_scene_graph_callbacks, + image_size=(640, 720), + ignore_errors=True, + debug_display=False, + ) + + +if __name__ == "__main__": + main() diff --git a/igibson/scene_graphs/behavior_demo_to_scene_graph.py b/igibson/scene_graphs/behavior_demo_to_scene_graph.py new file mode 100644 index 000000000..4ef4df8c0 --- /dev/null +++ b/igibson/scene_graphs/behavior_demo_to_scene_graph.py @@ -0,0 +1,53 @@ +import argparse +import os + +import igibson +from igibson.examples.behavior.behavior_demo_batch import behavior_demo_batch +from igibson.scene_graphs.graph_builder import SceneGraphBuilderWithVisualization + + +def parse_args(): + testdir = os.path.join(igibson.ig_dataset_path, "tests") + manifest = os.path.join(testdir, "test_manifest.txt") + parser = argparse.ArgumentParser(description="Build live scene graphs from BEHAVIOR demos in manifest.") + parser.add_argument( + "--demo_root", type=str, default=testdir, help="Directory containing demos listed in the manifest." + ) + parser.add_argument( + "--log_manifest", type=str, default=manifest, help="Plain text file consisting of list of demos to replay." + ) + parser.add_argument("--out_dir", type=str, default=testdir, help="Directory to store results in.") + return parser.parse_args() + + +def main(): + args = parse_args() + + def get_scene_graph_callbacks(demo_name, out_dir): + path = os.path.join(out_dir, demo_name) + if not os.path.exists(path): + os.mkdir(path) + graph_builder = SceneGraphBuilderWithVisualization( + show_window=False, out_path=path, only_true=True, realistic_positioning=True + ) + + return ( + [graph_builder.start], + [graph_builder.step], + [], + [], + ) + + behavior_demo_batch( + args.demo_root, + args.log_manifest, + args.out_dir, + get_scene_graph_callbacks, + image_size=(640, 720), + ignore_errors=False, + debug_display=False, + ) + + +if __name__ == "__main__": + main() diff --git a/igibson/scene_graphs/enum_obj_states.py b/igibson/scene_graphs/enum_obj_states.py new file mode 100644 index 000000000..bda82c457 --- /dev/null +++ b/igibson/scene_graphs/enum_obj_states.py @@ -0,0 +1,61 @@ +from enum import Enum + + +class UnaryStatesEnum(Enum): + Burnt = 0 + CleaningTool = 1 + HeatSourceOrSink = 2 + Cooked = 3 + Dusty = 4 + Frozen = 5 + Open = 6 + Sliced = 7 + Slicer = 8 + Soaked = 9 + Stained = 10 + ToggledOn = 11 + WaterSource = 12 + InFOVOfRobot = 13 + InHandOfRobot = 14 + InReachOfRobot = 15 + InSameRoomAsRobot = 16 + + +# Temperture and max temperature are not included + + +class BinaryStatesEnum(Enum): + InRoom = 0 + Inside = 1 + NextTo = 2 + OnFloor = 3 + OnTop = 4 + Touching = 5 + Under = 6 + + +# Not in use, we use binary InRoom for these +class RoomStatesEnum(Enum): + IsInBathroom = 0 + IsInBedroom = 1 + IsInChildsRoom = 2 + IsInCloset = 3 + IsInCorridor = 4 + IsInDiningRoom = 5 + IsInEmptyRoom = 6 + IsInExerciseRoom = 7 + IsInGarage = 8 + IsInHomeOffice = 9 + IsInKitchen = 10 + IsInLivingRoom = 11 + IsInLobby = 12 + IsInPantryRoom = 13 + IsInPlayroom = 14 + IsInStaircase = 15 + IsInStorageRoom = 16 + IsInTelevisionRoom = 17 + IsInUtilityRoom = 18 + IsInBalcony = 19 + IsInLibrary = 20 + IsInAuditorium = 21 + IsInUndefined = 22 diff --git a/igibson/scene_graphs/graph_builder.py b/igibson/scene_graphs/graph_builder.py new file mode 100644 index 000000000..682636a0c --- /dev/null +++ b/igibson/scene_graphs/graph_builder.py @@ -0,0 +1,285 @@ +import itertools +import os + +import cv2 +import networkx as nx +import numpy as np +import pybullet as p +from matplotlib import pyplot as plt + +from igibson import object_states +from igibson.activity.activity_base import iGBEHAVIORActivityInstance +from igibson.external.pybullet_tools.utils import aabb_union, get_aabb, get_aabb_center, get_aabb_extent +from igibson.object_states.factory import get_state_name +from igibson.object_states.object_state_base import AbsoluteObjectState, BooleanState, RelativeObjectState +from igibson.utils.utils import z_rotation_from_quat + +DRAW_EVERY = 1 + + +def get_robot(activity: iGBEHAVIORActivityInstance): + assert len(activity.simulator.robots) == 1, "Exactly one robot should be available." + return activity.simulator.robots[0] + + +def get_robot_bbox(robot): + # The robot doesn't have a nicely annotated bounding box so we just append one for now. + aabb = aabb_union([get_aabb(part.get_body_id()) for part in robot.parts.values()]) + center = get_aabb_center(aabb) + extent = get_aabb_extent(aabb) + return (center, [0, 0, 0, 1]), extent + + +def get_robot_to_world_transform(robot): + # TODO: Maybe keep the graph in the base reference frame and only switch to eye for visualization. + robot_to_world = robot.parts["body"].get_position_orientation() + + # Get rid of any rotation outside xy plane + robot_to_world = robot_to_world[0], z_rotation_from_quat(robot_to_world[1]) + + return robot_to_world + + +def get_unary_states(obj, only_true=False): + states = {} + for state_type, state_inst in obj.states.items(): + if not issubclass(state_type, BooleanState) or not issubclass(state_type, AbsoluteObjectState): + continue + + value = state_inst.get_value() + if only_true and not value: + continue + + states[get_state_name(state_type)] = value + + return states + + +def get_all_binary_states(objs, only_true=False): + states = [] + for obj1 in objs: + for obj2 in objs: + if obj1 == obj2: + continue + + for state_type, state_inst in obj1.states.items(): + if not issubclass(state_type, BooleanState) or not issubclass(state_type, RelativeObjectState): + continue + + value = state_inst.get_value(obj2) + if only_true and not value: + continue + + states.append((obj1, obj2, get_state_name(state_type), {"value": value})) + + return states + + +class SceneGraphBuilder(object): + def __init__(self, egocentric=False, full_obs=False, only_true=False, merge_parallel_edges=False): + """ + @param egocentric: Whether the objects should have poses in the world frame or robot frame. + @param full_obs: Whether all objects should be updated or only those in FOV of the robot. + @param only_true: Whether edges should be created only for relative states that have a value True, or for all + relative states (with the appropriate value attached as an attribute). + @param merge_parallel_edges: Whether parallel edges (e.g. different states of the same pair of objects) should + exist (making the graph a MultiDiGraph) or should be merged into a single edge instead. + """ + self.G = None + self.egocentric = egocentric + self.full_obs = full_obs + self.only_true = only_true + self.merge_parallel_edges = merge_parallel_edges + self.last_desired_frame_to_world = None + + def _get_desired_frame(self, robot): + desired_frame_to_world = ([0, 0, 0], [0, 0, 0, 1]) + world_to_desired_frame = ([0, 0, 0], [0, 0, 0, 1]) + if self.egocentric: + desired_frame_to_world = get_robot_to_world_transform(robot) + world_to_desired_frame = p.invertTransform(*desired_frame_to_world) + + return desired_frame_to_world, world_to_desired_frame + + def start(self, activity, log_reader): + assert self.G is None, "Cannot start graph builder multiple times." + + robot = get_robot(activity) + self.G = nx.DiGraph() if self.merge_parallel_edges else nx.MultiDiGraph() + + desired_frame_to_world, world_to_desired_frame = self._get_desired_frame(robot) + robot_pose = p.multiplyTransforms(*world_to_desired_frame, *get_robot_to_world_transform(robot)) + robot_bbox_pose, robot_bbox_extent = get_robot_bbox(robot) + robot_bbox_pose = p.multiplyTransforms(*world_to_desired_frame, *robot_bbox_pose) + self.G.add_node( + robot.parts["body"], pose=robot_pose, bbox_pose=robot_bbox_pose, bbox_extent=robot_bbox_extent, states={} + ) + self.last_desired_frame_to_world = desired_frame_to_world + + def step(self, activity, log_reader): + assert self.G is not None, "Cannot step graph builder before starting it." + + # Prepare the necessary transformations. + robot = get_robot(activity) + desired_frame_to_world, world_to_desired_frame = self._get_desired_frame(robot) + + # Update the position of everything that's already in the scene by using our relative position to last frame. + old_desired_to_new_desired = p.multiplyTransforms(*world_to_desired_frame, *self.last_desired_frame_to_world) + for obj in self.G.nodes: + self.G.nodes[obj]["pose"] = p.multiplyTransforms(*old_desired_to_new_desired, *self.G.nodes[obj]["pose"]) + self.G.nodes[obj]["bbox_pose"] = p.multiplyTransforms( + *old_desired_to_new_desired, *self.G.nodes[obj]["bbox_pose"] + ) + + # Update the robot's pose. We don't want to accumulate errors because of the repeated transforms. + self.G.nodes[robot.parts["body"]]["pose"] = p.multiplyTransforms( + *world_to_desired_frame, *get_robot_to_world_transform(robot) + ) + robot_bbox_pose, robot_bbox_extent = get_robot_bbox(robot) + robot_bbox_pose = p.multiplyTransforms(*world_to_desired_frame, *robot_bbox_pose) + self.G.nodes[robot.parts["body"]]["bbox_pose"] = robot_bbox_pose + self.G.nodes[robot.parts["body"]]["bbox_extent"] = robot_bbox_extent + + # Go through the objects in FOV of the robot. + objs_to_add = set(activity.simulator.scene.get_objects()) | set(activity.object_scope.values()) + if not self.full_obs: + # If we're not in full observability mode, only pick the objects in FOV of robot. + bids_in_fov = robot.parts["body"].states[object_states.ObjectsInFOVOfRobot].get_value() + objs_in_fov = set( + activity.simulator.scene.objects_by_id[bid] + for bid in bids_in_fov + if bid in activity.simulator.scene.objects_by_id + ) + objs_to_add &= objs_in_fov + + # Filter out any agent parts. + objs_to_add = {obj for obj in objs_to_add if obj.category != "agent"} + + for obj in objs_to_add: + # Add the object if not already in the graph + if obj not in self.G.nodes: + self.G.add_node(obj) + + # Get the relative position of the object & update it (reducing accumulated errors) + self.G.nodes[obj]["pose"] = p.multiplyTransforms(*world_to_desired_frame, *obj.get_position_orientation()) + + # Get the bounding box. + bbox_center, bbox_orn, bbox_extent, _ = obj.get_base_aligned_bounding_box(visual=True,fallback_to_aabb=True) + self.G.nodes[obj]["bbox_pose"] = p.multiplyTransforms(*world_to_desired_frame, bbox_center, bbox_orn) + self.G.nodes[obj]["bbox_extent"] = bbox_extent + + # Update the states of the object + self.G.nodes[obj]["states"] = get_unary_states(obj, only_true=self.only_true) + + # Update the binary states for seen objects. + self.G.remove_edges_from(list(itertools.product(objs_to_add, objs_to_add))) + edges = get_all_binary_states(objs_to_add, only_true=self.only_true) + if self.merge_parallel_edges: + new_edges = {} + for edge in edges: + edge_pair = (edge[0], edge[1]) + if edge_pair not in new_edges: + new_edges[edge_pair] = [] + + new_edges[edge_pair].append((edge[2], edge[3]["value"])) + + edges = [(k[0], k[1], {"states": v}) for k, v in new_edges.items()] + + self.G.add_edges_from(edges) + + # Save the robot's transform in this frame. + self.last_desired_frame_to_world = desired_frame_to_world + + +class SceneGraphBuilderWithVisualization(SceneGraphBuilder): + def __init__(self, show_window=True, out_path=None, realistic_positioning=False, *args, **kwargs): + """ + @param show_window: Whether a cv2 GUI window containing the visualization should be shown. + @param out_path: Directory to output visualization frames to. If None, no frames will be saved. + @param realistic_positioning: Whether nodes should be positioned based on their position in the scene (if True) + or placed using a graphviz layout (neato) that makes it easier to read edges & find clusters. + @param args: Any positional arguments to forward to the SceneGraphBuilder constructor. + @param kwargs: Any keyword arguments to forward to the SceneGraphBuilder constructor. Note that the + merge_parallel_edges argument is forced to True here because the graph drawing mechanism cannot show + parallel edges. + """ + super(SceneGraphBuilderWithVisualization, self).__init__(*args, **kwargs, merge_parallel_edges=True) + assert show_window or out_path, "One of show_window or out_path should be set." + self.show_window = show_window + self.out_path = out_path + self.out_writer = None + self.realistic_positioning = realistic_positioning + + def draw_graph(self): + nodes = list(self.G.nodes) + node_labels = {obj: obj.category for obj in nodes} + colors = [ + "yellow" + if obj.category == "agent" + else ("green" if obj.states[object_states.InFOVOfRobot].get_value() else "red") + for obj in nodes + ] + positions = ( + {obj: (-pose[0][1], pose[0][0]) for obj, pose in self.G.nodes.data("pose")} + if self.realistic_positioning + else nx.nx_pydot.pydot_layout(self.G, prog="neato") + ) + nx.drawing.draw_networkx( + self.G, + pos=positions, + labels=node_labels, + nodelist=nodes, + node_color=colors, + font_size=4, + arrowsize=5, + node_size=150, + ) + + edge_labels = { + edge: ", ".join( + state + "=" + str(value) if not self.only_true else state # Don't print value in only_true mode. + for state, value in self.G.edges[edge]["states"] + ) + for edge in self.G.edges + } + nx.drawing.draw_networkx_edge_labels(self.G, pos=positions, edge_labels=edge_labels, font_size=4) + + def step(self, activity, *args): + super(SceneGraphBuilderWithVisualization, self).step(activity, *args) + + if activity.simulator.frame_count % DRAW_EVERY != 0: + return + + # Prepare pyplot figure that's sized to match the robot video. + robot_view = (get_robot(activity).render_camera_image()[0][..., :3] * 255).astype(np.uint8) + imgheight, imgwidth, _ = robot_view.shape + + figheight = 4.8 + figdpi = imgheight / figheight + figwidth = imgwidth / figdpi + + # Draw the graph onto the figure. + fig = plt.figure(figsize=(figwidth, figheight), dpi=figdpi) + self.draw_graph() + fig.canvas.draw() + + # Convert the canvas to image + graph_view = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") + graph_view = graph_view.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + assert graph_view.shape == robot_view.shape + plt.close(fig) + + # Combine the two images side-by-side + img = np.hstack((robot_view, graph_view)) + + # Convert to BGR for cv2-based viewing. + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + + if self.out_path: + frame_path = os.path.join(self.out_path, "%05d.png" % activity.simulator.frame_count) + cv2.imwrite(frame_path, img) + + if self.show_window: + # display image with opencv or any operation you like + cv2.imshow("SceneGraph", img) + cv2.waitKey(1) diff --git a/igibson/scene_graphs/graph_exporter.py b/igibson/scene_graphs/graph_exporter.py new file mode 100644 index 000000000..7314b2e70 --- /dev/null +++ b/igibson/scene_graphs/graph_exporter.py @@ -0,0 +1,90 @@ +import json + +import numpy as np + +from igibson.scene_graphs.enum_obj_states import BinaryStatesEnum, UnaryStatesEnum +from igibson.scene_graphs.graph_builder import SceneGraphBuilder + + +class SceneGraphExporter(SceneGraphBuilder): + def __init__(self, h5py_file, num_frames_to_save=None, full_obs=False, **kwargs): + """ + @param h5py_file: file to store exported scene graph + @param num_frames_to_save: total number of equidistant frames to save. None if you want to set all frames. + """ + if num_frames_to_save is None: + assert full_obs, "You can only use full observability mode if you're subsampling frames." + + super(SceneGraphExporter, self).__init__(only_true=True, full_obs=full_obs, **kwargs) + self.h5py_file = h5py_file + + self.num_frames_to_save = num_frames_to_save + self.frame_idxes_to_save = None + + def start(self, activity, log_reader): + super(SceneGraphExporter, self).start(activity, log_reader) + + scene = activity.simulator.scene + objs = set(scene.get_objects()) | set(activity.object_scope.values()) + self.num_obj = len(objs) + self.dim = 34 # 34: dimension of all relevant information + # create a dictionary that maps objs to ids + self.obj_to_id = {obj: i for i, obj in enumerate(objs)} + + self.h5py_file.attrs["id_to_category"] = json.dumps( + {self.obj_to_id[obj]: obj.category for obj in self.obj_to_id} + ) + self.h5py_file.attrs["id_to_name"] = json.dumps({self.obj_to_id[obj]: obj.name for obj in self.obj_to_id}) + + if self.num_frames_to_save is not None: + key_presses = log_reader.hf["agent_actions"]["vr_robot"][:, [19, 27]] + assert len(key_presses) > 0, "No key press found: too few frames." + any_key_press = np.max(key_presses[200:], axis=1) + first_frame = np.argmax(any_key_press) + 200 + assert np.any(key_presses[first_frame] == 1), "No key press found: robot never activated." + self.frame_idxes_to_save = set( + np.linspace( + first_frame, log_reader.total_frame_num - 1, self.num_frames_to_save, endpoint=True, dtype=int + ) + ) + + def step(self, activity, log_reader): + frame_count = activity.simulator.frame_count + if self.num_frames_to_save is not None and frame_count not in self.frame_idxes_to_save: + return + + super(SceneGraphExporter, self).step(activity, log_reader) + print("Frame: %s" % frame_count) + + nodes_t = np.zeros((self.num_obj, self.dim), dtype=np.float32) + for obj in self.G.nodes: + states = self.G.nodes[obj]["states"] + unary_states = np.full(len(UnaryStatesEnum), -1, dtype=np.int8) + for state_name in states: + unary_states[UnaryStatesEnum[state_name].value] = states[state_name] + + nodes_t[self.obj_to_id[obj]] = ( + [item for tupl in self.G.nodes[obj]["pose"] for item in tupl] + + [item for tupl in self.G.nodes[obj]["bbox_pose"] for item in tupl] + + [item for item in self.G.nodes[obj]["bbox_extent"]] + + list(unary_states) + ) + + edges_t = [] + for edge in self.G.edges: + from_obj_id, to_obj_id = self.obj_to_id[edge[0]], self.obj_to_id[edge[1]] + edges_t.append([BinaryStatesEnum[edge[2]].value, from_obj_id, to_obj_id]) + + fc = str(frame_count) + self.h5py_file.create_dataset("/nodes/" + fc, data=nodes_t, compression="gzip") + self.h5py_file.create_dataset("/edges/" + fc, data=edges_t, compression="gzip") + + # profiler.stop() + + # html = profiler.output_html() + # html_path = "profile.html" + # with open(html_path, "w") as f: + # f.write(html) + + # import pdb + # pdb.set_trace() diff --git a/igibson/utils/utils.py b/igibson/utils/utils.py index 8a8e9dfe9..18457e937 100644 --- a/igibson/utils/utils.py +++ b/igibson/utils/utils.py @@ -221,3 +221,12 @@ def restoreState(*args, **kwargs): body_id, *p.getBasePositionAndOrientation(body_id), physicsClientId=kwargs.get("physicsClientId", 0) ) return p.restoreState(*args, **kwargs) + +def z_angle_from_quat(quat): + """Get the angle around the Z axis produced by the quaternion.""" + rotated_X_axis = R.from_quat(quat).apply([1, 0, 0]) + return np.arctan2(rotated_X_axis[1], rotated_X_axis[0]) + +def z_rotation_from_quat(quat): + """Get the quaternion for the rotation around the Z axis produced by the quaternion.""" + return R.from_euler("z", z_angle_from_quat(quat)).as_quat() \ No newline at end of file