train.py


import torch
from models.SDFGrid import SDFGrid
from lietorch import SE3
from utils.scannet_data_tools.depth2point3D import init_pointcloud2
from render.render_helper import *
from models.rays import *
from wisp.core import Rays
from itertools import product
import time
# import wisp.ops.spc as wisp_spc_ops
from utils.scannet_data_tools.kitti360Viewer3DRaw import Kitti360Viewer3DRaw
class Train():
        def __init__(self, device, config, data_list, vec_es, vec_cam, cubes_num):
                seq = int(config['path']['dataset_dir'].split("/")[-3][-6])
                self.velo = Kitti360Viewer3DRaw(mode='velodyne', seq=seq)
                self.init_train(device, config, data_list, vec_es, vec_cam, cubes_num)
                self.pre_time, self.hit_time, self.back_time = [], [], []

        def init_train(self, device, config, data_list, vec_es, vec_cam, cubes_num):
                self.nef = SDFGrid(device, config, vec_es, vec_cam)
                # self.vec_es = vec_es
                pose_list = [SE3(vec_es[f"{ii}"]).matrix() for ii in data_list]
                

                if cubes_num==0:
                        data_list_octree_init = range(data_list[0],data_list[-1]) 

                        pose_list = [SE3(vec_es[f"{ii}"]).matrix() for ii in data_list_octree_init]
                        self.nef.world_dims, self.nef.volume_origin, self.nef.voxel_dims, world_points = init_pointcloud2(
                                config, self.velo, data_list_octree_init, pose_list, self.nef.voxel_sizes, device, is_first=True) 
                        self.nef.scale = self.nef.world_dims.max()/2
                        self.nef.origin = self.nef.volume_origin+self.nef.world_dims/2
                        pointcloud = (world_points - self.nef.origin) / self.nef.scale # grid corrd

                        if config['path']['dataset_type'] == 'maicity':
                                self.nef.init_from_pointcloud(pointcloud,vox_down_m=0.05/self.nef.scale,dilate=1)
                        else:
                                self.nef.init_from_pointcloud(pointcloud,vox_down_m=0.05/self.nef.scale)
                        del world_points, pointcloud
                else:
                        # path = os.path.join(config['path']['proj_dir'], f"grid/optimed_grid_{cubes_num-1}.pth")
                        path = os.path.join(config['path']['proj_dir'], f"grid/optimed_grid_0.pth")
                        data_list_octree_init = range(data_list[0]-5,data_list[-1]+5) 
                        self.nef.read_last_grid(path)
                        # data_list_octree_init = data_list #range(data_list[0],data_list[-1]) #TODO
                        pose_list = [SE3(vec_es[f"{ii}"]).matrix() for ii in data_list_octree_init]
                        world_points, bounds = init_pointcloud2(config, self.velo, data_list_octree_init, pose_list, self.nef.voxel_sizes, device, is_first=False) 
                        self.nef.volume_origin = bounds[:,0] - (self.nef.world_dims - bounds[:,1] + bounds[:,0]) / 2
                        self.nef.origin = self.nef.volume_origin+self.nef.world_dims/2
                        pointcloud = (world_points - self.nef.origin) / self.nef.scale
                        self.nef.init_from_pointcloud(pointcloud,vox_down_m=0.05/self.nef.scale)
                        del world_points, pointcloud
                
                self.set_train_cfg(config)
                # self.tracer = Tracer(data_list, self.nef.vec_es, scale, origin, self.nef.world_dims.cpu().numpy(), self.nef.volume_origin.cpu().numpy(), self.nef.device)

        def set_train_cfg(self, config):
                self.train_info = config['train']
                
                # Set Optimizer
                self.lr = {}
                self.lr['occ_lr'] = self.train_info['occ_lr'] if self.train_info['depth_optim'] else 0
                self.lr['pose_lr'] = self.train_info['pose_lr'] if self.train_info['pose_optim'] else 0
                self.lr['s_lr'] = self.train_info['s_lr']
                self.lr['rgb_lr'] = self.train_info['rgb_lr']
                self.lr['mlp_lr'] = self.train_info['mlp_lr']
                # self.nef.set_optimer(self.mode, occ_lr, 0, s_lr, sem_lr, 0.001)
                self.pose_epoch = self.train_info['pose_epoch']
                # Get lamuda
                self.lamudas = {}
                self.lamudas['depth_lamuda'] = self.train_info['depth_lamuda'] if self.train_info['depth_optim'] else 0
                self.lamudas['depth_lamuda_decay'] = self.train_info['depth_lamuda_decay'] if self.train_info['depth_optim'] else 0
                
                #self.lamudas['eik_lamuda'] = train_info['eik_lamuda'] 
                #self.lamudas['smoothing_lamuda'] = train_info['smoothing_lamuda']  

                # self.depth = self.train_info['depth_lamuda']
                # self.eik = self.train_info['eik_lamuda'] 
                # self.smooth = self.train_info['smoothing_lamuda'] 

                self.lamudas['sdf_near_lamuda'] = self.train_info['sdf_near_lamuda'] 
                self.lamudas['sdf_far_lamuda'] = self.train_info['sdf_far_lamuda'] 
                self.lamudas['normal_lamuda'] = self.train_info['normal_lamuda']
                self.lamudas['rgb_lamuda'] = self.train_info['rgb_lamuda']
                self.lamudas['semantic_lamuda'] = self.train_info['semantic_lamuda']
                self.lamudas['feature_lamuda'] = self.train_info['feature_lamuda']
                # Get Loss terms
                self.optim_flag =  config["loss_term"]
                self.normal_flag = config["loss_term"]["normal"]
                self.semantic_flag = config["loss_term"]["semantic2d"] or config["loss_term"]["semantic3d"] #config["loss_term"]["semantic"] 
                self.rgb_flag = config["loss_term"]["rgb"] 
                self.feature_flag = config["loss_term"]["feature"] 
                if config['path']['dataset_type'] == 'kitti-360':
                        self.ray_long = 8 + self.normal_flag * 3 + self.rgb_flag * 3 + self.feature_flag * 64
                else:
                        if ~self.normal_flag and ~self.semantic_flag:
                                self.ray_long = 7
                        elif ~self.normal_flag and self.semantic_flag:
                                self.ray_long = 8
                        else:
                                self.ray_long = 11
        
        def set_frame_optimer(self, path_base, data_list, vec_loam, vec_gt, vec_es_odom, key_idx, traj_gt, traj_loam):
                self.nef.data_list = data_list
                self.nef.init_pose(path_base, data_list, vec_loam, vec_gt, vec_es_odom, key_idx, traj_gt, traj_loam)
                self.nef.set_optimer(self.lr['occ_lr'], self.lr['pose_lr'], self.lr['s_lr'], self.lr['rgb_lr'], self.lr['mlp_lr']) #0.001
                self.optim_pose = self.train_info['pose_optim']
                # self.lamudas['depth_lamuda'] = 1 
                # self.lamudas['eik_lamuda'] = 20
                # self.lamudas['smoothing_lamuda'] = 5
                
                self.lamudas['depth_lamuda'] = self.train_info['depth_lamuda']
                self.lamudas['eik_lamuda'] = self.train_info['eik_lamuda'] 
                self.lamudas['smoothing_lamuda'] = self.train_info['smoothing_lamuda']  
                print(self.lamudas)
                print(self.lr)
        
        def update_optimer(self):
                self.nef.set_optimer(self.lr['occ_lr'], self.lr['pose_lr'], self.lr['s_lr'], self.lr['rgb_lr'], self.lr['mlp_lr']) #0.001
                self.optim_pose = self.lr['pose_lr']>0
                self.lamudas['depth_lamuda'] = self.train_info['depth_lamuda']
                self.lamudas['eik_lamuda'] = self.train_info['eik_lamuda'] 
                self.lamudas['smoothing_lamuda'] = self.train_info['smoothing_lamuda'] 

        def init_loss(self):
                self.losses_occ = []
                self.losses_depth = []
                self.losses_tv = []
                self.losses_eik = []
                self.losses_near = []
                self.losses_far = []
                self.losses_normal1 = []
                self.losses_normal2 = []
                self.losses_rgb = []
                self.losses_sem = []
                self.losses_feat = []
                self.losses_pose = []

        def record_loss(self, loss_occ, depthloss, smoothing_error, eik_error, sdf_error_near, sdf_error_far, sem_error): #, nor_error1, nor_error2
                self.losses_occ.append(loss_occ)
                self.losses_depth.append(depthloss)
                self.losses_tv.append(smoothing_error)
                self.losses_eik.append(eik_error)
                self.losses_near.append(sdf_error_near)
                self.losses_far.append(sdf_error_far)
                self.losses_sem.append(sem_error)

        def mean_loss(self, e, show=False):
                losses_occ_mean = np.stack(self.losses_occ).mean()
                depthloss_mean = np.stack(self.losses_depth).mean()
                tv_mean = np.stack(self.losses_tv).mean()
                eik_mean = np.stack(self.losses_eik).mean()
                near_mean = np.stack(self.losses_near).mean()
                far_mean = np.stack(self.losses_far).mean()
                sem_mean = np.stack(self.losses_sem).mean()
                log_depth = f"Train Report : epoch={e}, occ_loss={losses_occ_mean}, occ_lr={self.nef.optim_occ.state_dict()['param_groups'][0]['lr']}, depth={depthloss_mean}, tv={tv_mean},eik={eik_mean},near={near_mean},far={far_mean}, sem={sem_mean}" #,n1={normal1_mean},n2={normal2_mean}
                print(log_depth)
                # logs.append(log_depth+"\n")
                if show:
                        self.loss = losses_occ_mean
                        self.depth = depthloss_mean
                        self.smooth = tv_mean
                        self.eik = eik_mean
                        self.near = near_mean
                        self.far = far_mean
                        # self.rgb = rgb_mean
                        self.sem = sem_mean
                        # self.feat = feat_mean
                        # self.pose = pose_mean
        
        def tensorboard_show(self, writer, its):
                writer.add_scalar("Loss", self.loss, its)
                writer.add_scalar("depth", self.depth, its)
                writer.add_scalar("smooth", self.smooth, its)
                writer.add_scalar("eik", self.eik, its)
                writer.add_scalar("near", self.near, its)
                writer.add_scalar("far", self.far, its)
                writer.add_scalar("sem", self.sem, its)

        def train(self, train_data, train_list_key_frame, e, tqdm_t, cos_anneal_ratio=0): #, nor_error1, nor_error2, sem_error \
                # t1 = time.time()
                # depthloss, smoothing_error, eik_error, sdf_error_near, sdf_error_far, rgb_error, sem_error, feat_error \
                loss_occ, loss = self.get_rays_loss(train_data,
                                        train_list_key_frame,
                                        # multi_cube,
                                        epoch_n=e, cos_anneal_ratio=cos_anneal_ratio)
                
                self.record_loss(loss_occ, loss['depth'], loss['smooth'], loss['eikonal'], loss['near'], loss['far'], loss['semantic'])
                # tqdm_t.set_postfix(sdf=sdf_error_near, depth=depthloss, tv=smoothing_error, eik=eik_error, s=self.nef.s.data.item(), rgb=rgb_error, sem=sem_error, feat=feat_error)
                tqdm_t.set_postfix(sdf=loss['near'], depth=loss['depth'], tv=loss['smooth'], eik=loss['eikonal'], s=self.nef.s.data.item(), sem=loss['semantic'])

        #----------------------------------Loss------------------------------- 
        def get_rays_loss(self, rayses, data_list, epoch_n=0, cos_anneal_ratio=0):
                t1 = time.time()
                rays_w = self.nef.get_rays_World(rayses) 
                rays_g = get_rays_sfm(rays_w, self.nef.scale, self.nef.origin)
                # # get loss lamuda :

                self.nef.iter_n += 1
                self.nef.optim_occ.zero_grad()
                
                rays_g_all = torch.reshape(rays_g,((-1,self.ray_long)))
                # semantic_gt = torch.tensor(rays_g_all[:,-2:-1],dtype=int)
                semantic_gt = rays_g_all[:,-2:-1].clone().detach().int()
                not_sky_mask = (semantic_gt!=23).squeeze() # sky=23
                rays_g_all = rays_g_all[not_sky_mask,:]
                rays_go = rays_g_all[:,:3]
                rays_gd = rays_g_all[:,3:6]
                semantic_gt = semantic_gt[not_sky_mask,:].squeeze()
                ground_truth = {'depth':rays_g_all[:,-1]}
                ground_truth['semantic'] = semantic_gt   
                        
                z_near, z_far = instection(rays_go, rays_gd, ground_truth['depth'])

                rays_g = Rays(origins=rays_go, dirs=rays_gd, dist_min=z_near, dist_max=z_far)
                # t2 = time.time()
                loss = \
                        self.nef.get_hit( 
                                rays_g, 
                                ground_truth,
                                self.nef.iter_n,
                                epoch_n,
                                cos_anneal_ratio
                                ) 
                # t3 = time.time()
                #--------------------------------------------
                Loss = 0
               
                Loss += self.lamudas['depth_lamuda'] * loss['depth']
                # Loss += self.lamudas['eik_lamuda'] * loss['eikonal']
                Loss += self.lamudas['smoothing_lamuda'] * loss['smooth']
                Loss += self.lamudas['sdf_near_lamuda'] * loss['near'] + self.lamudas['sdf_far_lamuda'] * loss['far']
                if self.semantic_flag or self.nef.optim_mode==3:
                        Loss += self.lamudas['semantic_lamuda'] * loss['semantic']

                # with torch.autograd.detect_anomaly():
                Loss.backward(retain_graph=True)
                # t4 = time.time()
                # self.pre_time.append( (t2 - t1)*1000)
                # self.hit_time.append((t3 - t2)*1000)
                # self.back_time.append((t4 - t3)*1000)
                # if len(self.pre_time) == 55:
                #         print('pre_data运行时间:%sms, get_hit: %sms, backward: %sms'  % (sum(self.pre_time)/len(self.pre_time), sum(self.hit_time)/len(self.hit_time), sum(self.back_time)/len(self.back_time)))
                #         self.pre_time, self.hit_time, self.back_time = [], [], []
                # print('pre_data运行时间:%sms, get_hit: %sms, backward: %sms' 
                # % ((t2 - t1)*1000, ((t3 - t2)*1000), ((t4 - t3)*1000)))
         
                self.nef.optim_occ.step()
                if self.optim_pose:
                        if self.nef.epoch>=self.pose_epoch:
                                # torch.nn.utils.clip_grad_norm_(self.nef.R_cam, 1, norm_type=2)
                                self.nef.pose_optimizer.step()
                                self.nef.pose_optimizer.zero_grad()
                                self.nef.pose_update()
                                with torch.no_grad():
                                        self.nef.plot_traj()
                        else:
                                self.nef.pose_optimizer.zero_grad()

                Loss = Loss.cpu().item() if torch.is_tensor(Loss) else Loss

                for key in loss.keys():
                        loss[key] = loss[key].cpu().item() if torch.is_tensor(loss[key]) else loss[key]
                
                
                return Loss, loss

        #-------------------------------------------------------------
        def get_depth_lidar(self, rays_d, c2w, depth_gt):
                rays_o, rays_d = get_rays(rays_d, c2w) 
                rays_go = get_rays_sfm(rays_o[None,:,:], self.nef.scale, self.nef.origin)
                with torch.no_grad():
                        depth = self.tracer.get_hit(self.nef, rays_go[0,:,:], rays_d, depth_gt, None, self.nef.iter_n, flag=0)
                        points = rays_o + rays_d * depth
                        points_gt = rays_o + rays_d * depth_gt[:,None]
                return depth, points, points_gt
        
        def get_sdf_reander(self, rays_o, rays_d, c2w):
                rays_o, rays_d = get_rays(rays_o, rays_d, c2w) 
                rays_go = get_rays_sfm(rays_o[None,:,:], self.nef.scale, self.nef.origin)
                with torch.no_grad():
                        surface_point, normal, depth_SF, rgb = self.nef.sphere_tracing(rays_go[0,:,:], rays_d)

                        # depth_gt = rays_g_all[:,-1]
                        # z_near, z_far = instection(rays_go[0,:,:], rays_d, depth_gt)

                        # rays_g = Rays(origins=rays_go[0,:,:], dirs=rays_d, dist_min=z_near, dist_max=z_far)
                        # depth_render = self.nef.render(rays_g)

                return surface_point, normal, depth_SF, rgb


def clean_cache():
        torch.cuda.empty_cache()
        torch.cuda.empty_cache()
        torch.cuda.empty_cache()
        torch.cuda.empty_cache()
        torch.cuda.empty_cache()

@torch.no_grad()
def instection(rays_o,rays_d, depth_gt):
        z_near = 1.0 * torch.ones(depth_gt.size(0)).to(rays_o.device)
        z_far = depth_gt+2
        z_far[depth_gt<0] = -depth_gt[depth_gt<0]+80
        return z_near, z_far


def expand_points(points, voxel_size):
    """
    A naive version of the sparse dilation.
    """
    # a cube with size=3 and step=1.
    cube_grids_3 = list(product(*zip([0, 0, -1], [0, 0, 0]))) #[-1, -1, -1],  [1, 1, 1]
    # add the offsets to the points.
    points_expanded = [
        points + np.array(grid_point) * voxel_size for grid_point in cube_grids_3
    ]
    points_expanded = np.concatenate(points_expanded, axis=0)
    return np.unique(points_expanded, axis=0)