diff --git a/costar_models/python/costar_models/callbacks.py b/costar_models/python/costar_models/callbacks.py index f7f5ccddb..aa7b1e092 100644 --- a/costar_models/python/costar_models/callbacks.py +++ b/costar_models/python/costar_models/callbacks.py @@ -312,6 +312,85 @@ def on_epoch_end(self, epoch, logs={}): fig.savefig(name, bbox_inches="tight") plt.close(fig) +class PredictorShowImageOnlyMultiStep(keras.callbacks.Callback): + ''' + Save an image showing what some number of frames and associated predictions + will look like at the end of an epoch. + ''' + + def __init__(self, predictor, features, targets, + model_directory=DEFAULT_MODEL_DIRECTORY, + num_hypotheses=4, + verbose=False, + features_name=None, + noise_dim=64, + use_noise=False, + name="model", + use_prev_option=True, + min_idx=0, max_idx=66, step=11): + ''' + Set up a data set we can use to output validation images. + + Parameters: + ----------- + predictor: model used to generate predictions + targets: training target info, in compressed form + num_hypotheses: how many outputs to expect + verbose: print out extra information + ''' + + if features_name is None: + self.features_name = "def" + else: + self.features_name = features_name + self.verbose = verbose + self.predictor = predictor + self.idxs = range(min_idx, max_idx, step) + self.num = len(self.idxs) + self.features = [f[self.idxs] for f in features] + self.targets = [np.squeeze(t[self.idxs]) for t in targets] + self.epoch = 0 + self.num_hypotheses = num_hypotheses + self.directory = os.path.join(model_directory,'debug') + self.noise_dim = noise_dim + self.use_noise = use_noise + if not os.path.exists(self.directory): + os.makedirs(self.directory) + + def on_epoch_end(self, epoch, logs={}): + # take the model and print it out + self.epoch += 1 + data = self.predictor.predict(self.features) + plt.ioff() + if self.verbose: + print("============================") + for j in range(self.num): + name = os.path.join(self.directory, + "%s_predictor_epoch%03d_result%d.png"%(self.features_name, + self.epoch, j)) + fig = plt.figure()#figsize=(3+int(1.5*self.num_hypotheses),2)) + + plt.subplot(2,2+self.num_hypotheses,1) + plt.title('Input Image') + plt.imshow(self.features[0][j]) + for k in range(2): + # This counts off rows + rand_offset = (k*(2+self.num_hypotheses)) + plt.subplot(2,2+self.num_hypotheses,2+self.num_hypotheses+rand_offset) + plt.title('Observed Goal') + plt.imshow(np.squeeze(self.targets[k][j])) + for i in range(self.num_hypotheses): + plt.subplot(2,2+self.num_hypotheses,i+2+rand_offset) + plt.imshow(np.squeeze(data[k][j][i])) + plt.title('Hypothesis %d'%(i+1)) + + if self.verbose: + print(name) + fig.savefig(name, bbox_inches="tight") + plt.close(fig) + + + class PredictorShowImageOnly(keras.callbacks.Callback): ''' Save an image showing what some number of frames and associated predictions diff --git a/costar_models/python/costar_models/conditional_image.py b/costar_models/python/costar_models/conditional_image.py index 1006a3ec9..c00e12579 100644 --- a/costar_models/python/costar_models/conditional_image.py +++ b/costar_models/python/costar_models/conditional_image.py @@ -104,12 +104,6 @@ def _makePredictor(self, features): #value_out = value_model([h,label_in]) #next_option_out = next_model([h,label_in]) - # create input for controlling noise output if that's what we decide - # that we want to do - if self.use_noise: - z = Input((self.num_hypotheses, self.noise_dim)) - ins += [z] - next_option_in = Input((1,), name="next_option_in") next_option_in2 = Input((1,), name="next_option_in2") ins += [next_option_in, next_option_in2] @@ -159,7 +153,8 @@ def _makePredictor(self, features): train_predictor.compile( loss=[lfn, lfn, "binary_crossentropy", val_loss, lfn2, lfn2, "categorical_crossentropy"], - loss_weights=[1., 1., 0.1, 0.1, 1., 0.2, 1e-4], + #loss_weights=[1., 1., 0.1, 0.1, 1., 0.2, 1e-3], + loss_weights=[1., 1., 0., 0., 0., 0., 1e-3], optimizer=self.getOptimizer()) else: train_predictor = Model(ins + [label_in], diff --git a/costar_models/python/costar_models/conditional_image_gan.py b/costar_models/python/costar_models/conditional_image_gan.py index 7135aa9bc..e39a9847d 100644 --- a/costar_models/python/costar_models/conditional_image_gan.py +++ b/costar_models/python/costar_models/conditional_image_gan.py @@ -137,8 +137,6 @@ def _makePredictor(self, features): loss=["mae"]*2 + ["binary_crossentropy"], loss_weights=[100., 100., 1.], optimizer=self.getOptimizer()) - model.summary() - self.discriminator.summary() self.model = model return predictor, model, model, ins, h @@ -200,10 +198,9 @@ def _makeImageDiscriminator(self, img_shape): #x = Concatenate()([x1, x2]) x = x2 x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True) - #x = AddConv2D(x, 128, [4,4], 1, dr, "same", lrelu=True) x= AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True) - #x = AddConv2D(x, 256, [4,4], 1, dr, "same", lrelu=True) - x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid") + x = AddConv2D(x, 1, [1,1], 1, 0., "same", activation="sigmoid", + bn=False) #x = MaxPooling2D(pool_size=(8,8))(x) x = AveragePooling2D(pool_size=(8,8))(x) diff --git a/costar_models/python/costar_models/conditional_image_gan_jigsaws.py b/costar_models/python/costar_models/conditional_image_gan_jigsaws.py index 523f9065c..01e77b102 100644 --- a/costar_models/python/costar_models/conditional_image_gan_jigsaws.py +++ b/costar_models/python/costar_models/conditional_image_gan_jigsaws.py @@ -94,7 +94,6 @@ def _makeModel(self, image, *args, **kwargs): loss_weights=[100., 100., 1.], optimizer=self.getOptimizer()) model.summary() - self.discriminator.summary() self.model = model self.predictor = generator @@ -137,16 +136,20 @@ def _makeImageDiscriminator(self, img_shape): xg1 = AddConv2D(img_goal, 64, [4,4], 1, dr, "same", lrelu=True, bn=False) xg2 = AddConv2D(img_goal2, 64, [4,4], 1, dr, "same", lrelu=True, bn=False) - x1 = Add()([x0, xobs, xg1]) - x2 = Add()([x0, xg1, xg2]) + #x1 = Add()([x0, xobs, xg1]) + #x2 = Add()([x0, xg1, xg2]) + x1 = Add()([xobs, xg1]) + x2 = Add()([xg1, xg2]) + #x1 = Concatenate(axis=-1)([img, img_goal]) + #x2 = Concatenate(axis=-1)([img_goal, img_goal2]) # ------------------------------------------------------------- y = OneHot(self.num_options)(option) y = AddDense(y, 64, "lrelu", dr) - x1 = TileOnto(x1, y, 64, img_size, add=True) + #x1 = TileOnto(x1, y, 64, img_size, add=True) x1 = AddConv2D(x1, 64, [4,4], 2, dr, "same", lrelu=True, bn=False) - x1 = AddConv2D(x1, 128, [4,4], 2, dr, "same", lrelu=True) - #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True) + x1 = AddConv2D(x1, 128, [4,4], 2, dr, "same", lrelu=True, bn=True) + #x1 = AddConv2D(x1, 256, [4,4], 2, dr, "same", lrelu=True, bn=True) #x1 = AddConv2D(x1, 1, [4,4], 1, 0., "same", activation="sigmoid") # ------------------------------------------------------------- @@ -154,14 +157,18 @@ def _makeImageDiscriminator(self, img_shape): y = AddDense(y, 64, "lrelu", dr) x2 = TileOnto(x2, y, 64, img_size, add=True) x2 = AddConv2D(x2, 64, [4,4], 2, dr, "same", lrelu=True, bn=False) - x2 = AddConv2D(x2, 128, [4,4], 2, dr, "same", lrelu=True) - #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True) - x = Concatenate(axis=-1)([x1, x2]) + # Final block + x = x2 + x2 = AddConv2D(x2, 128, [4,4], 2, dr, "same", lrelu=True, bn=True) + x2 = AddConv2D(x2, 256, [4,4], 2, dr, "same", lrelu=True, bn=True) + #x = Concatenate(axis=-1)([x1, x2]) #x = Add()([x1, x2]) - x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid") - #x = AveragePooling2D(pool_size=(12,16))(x) - x = AveragePooling2D(pool_size=(24,32))(x) + x = AddConv2D(x2, 1, [1,1], 1, 0., "same", activation="sigmoid", bn=False) + + # Combine + x = AveragePooling2D(pool_size=(12,16))(x) + #x = AveragePooling2D(pool_size=(24,32))(x) x = Flatten()(x) discrim = Model(ins, x, name="image_discriminator") self.lr *= 2. diff --git a/costar_models/python/costar_models/conditional_image_jigsaws.py b/costar_models/python/costar_models/conditional_image_jigsaws.py index dcb03e3fa..29f1933ef 100644 --- a/costar_models/python/costar_models/conditional_image_jigsaws.py +++ b/costar_models/python/costar_models/conditional_image_jigsaws.py @@ -15,10 +15,7 @@ from keras.optimizers import Adam from matplotlib import pyplot as plt -from .abstract import * -from .callbacks import * from .robot_multi_models import * -from .split import * from .mhp_loss import * from .loss import * from .sampler2 import * @@ -33,10 +30,14 @@ def __init__(self, *args, **kwargs): super(ConditionalImageJigsaws, self).__init__(*args, **kwargs) self.num_options = SuturingNumOptions() + self.PredictorCb = PredictorShowImageOnlyMultiStep def _makeModel(self, image, *args, **kwargs): img_shape = image.shape[1:] + img_size = 1. + for dim in img_shape: + img_size *= dim img0_in = Input(img_shape, name="predictor_img0_in") img_in = Input(img_shape, name="predictor_img_in") @@ -52,7 +53,7 @@ def _makeModel(self, image, *args, **kwargs): # ===================================================================== # Load weights and stuff - LoadEncoderWeights(self, encoder, decoder) + LoadEncoderWeights(self, encoder, decoder, gan=True) image_discriminator = LoadGoalClassifierWeights(self, make_classifier_fn=MakeJigsawsImageClassifier, img_shape=img_shape) @@ -82,17 +83,30 @@ def _makeModel(self, image, *args, **kwargs): option_in2 = Input((1,), name="option_in2") ins += [option_in, option_in2] + # -------------------------------------------------------------------- + # Create multiple hypothesis loss + lfn = MhpLossWithShape( + num_hypotheses=self.num_hypotheses, + outputs=[img_size], + weights=[1.0], + loss=[self.loss], + avg_weight=0.05, + ) + + # -------------------------------------------------------------------- # Image model + h_dim = (12, 16) + multi_decoder = MakeJigsawsMultiDecoder(self, decoder, + self.num_hypotheses, h_dim) y = Flatten()(OneHot(self.num_options)(option_in)) y2 = Flatten()(OneHot(self.num_options)(option_in2)) - x = h - tform = MakeJigsawsTransform(self, h_dim=(12,16)) - x = tform([h0, h, y]) + x = MakeJigsawsExpand(self, h, h_dim) + tform = MakeJigsawsTransform(self, h_dim) + x = tform([h0, x, y]) x2 = tform([h0, x, y2]) - image_out, image_out2 = decoder([x]), decoder([x2]) - disc_out2 = image_discriminator(image_out2) + image_out, image_out2 = multi_decoder([x]), multi_decoder([x2]) + #disc_out2 = image_discriminator(image_out2) - lfn = self.loss lfn2 = "logcosh" # ===================================================================== @@ -100,18 +114,19 @@ def _makeModel(self, image, *args, **kwargs): predictor = Model(ins + [prev_option_in], [image_out, image_out2, next_option_out]) predictor.compile( - loss=[lfn, lfn, "binary_crossentropy"], + loss=[self.loss, self.loss, "binary_crossentropy"], loss_weights=[1., 1., 0.1], optimizer=self.getOptimizer()) model = Model(ins + [prev_option_in], - [image_out, image_out2, next_option_out, disc_out2]) + [image_out, image_out2, next_option_out])#, disc_out2]) model.compile( - loss=[lfn, lfn, "binary_crossentropy", "categorical_crossentropy"], - loss_weights=[1., 1., 0.1, 1e-4], + loss=[lfn, lfn, "binary_crossentropy"],# "categorical_crossentropy"], + loss_weights=[1., 1., 0.1],#, 1e-3], optimizer=self.getOptimizer()) self.predictor = predictor self.model = model + self.model.summary() def _getData(self, image, label, goal_image, goal_label, prev_label, *args, **kwargs): @@ -128,5 +143,7 @@ def _getData(self, image, label, goal_image, goal_label, label_1h = np.squeeze(ToOneHot2D(label, self.num_options)) label2_1h = np.squeeze(ToOneHot2D(label2, self.num_options)) - return [image0, image, label, goal_label, prev_label], [goal_image, goal_image2, label_1h, label2_1h] + return ([image0, image, label, goal_label, prev_label], + [np.expand_dims(goal_image, axis=1), + np.expand_dims(goal_image2, axis=1), label_1h])#, label2_1h] diff --git a/costar_models/python/costar_models/conditional_sampler2.py b/costar_models/python/costar_models/conditional_sampler2.py index 239f9afd3..719f1db0b 100644 --- a/costar_models/python/costar_models/conditional_sampler2.py +++ b/costar_models/python/costar_models/conditional_sampler2.py @@ -67,35 +67,25 @@ def _makePredictor(self, features): label_in = Input((1,)) ins = [img_in, arm_in, gripper_in, label_in] - encoder = self._makeImageEncoder(img_shape) - try: - encoder.load_weights(self._makeName( - "pretrain_image_encoder_model", - "image_encoder.h5f")) - encoder.trainable = self.retrain - except Exception as e: - raise e - if self.skip_connections: - decoder = self._makeImageDecoder(self.hidden_shape,self.skip_shape) + encoder = self._makeImageEncoder2(img_shape) + decoder = self._makeImageDecoder2(self.hidden_shape) else: + encoder = self._makeImageEncoder(img_shape) decoder = self._makeImageDecoder(self.hidden_shape) - try: - decoder.load_weights(self._makeName( - "pretrain_image_encoder_model", - "image_decoder.h5f")) - decoder.trainable = self.retrain - except Exception as e: - raise e + LoadEncoderWeights(self, encoder, decoder) + image_discriminator = LoadGoalClassifierWeights(self, + make_classifier_fn=MakeImageClassifier, + img_shape=img_shape) + + # ===================================================================== + # Load the arm and gripper representation rep_channels = self.encoder_channels sencoder = self._makeStateEncoder(arm_size, gripper_size, False) sdecoder = self._makeStateDecoder(arm_size, gripper_size, rep_channels) - # ===================================================================== - # Load the arm and gripper representation - # ===================================================================== # combine these models together with state information and label # information @@ -104,12 +94,12 @@ def _makePredictor(self, features): hidden_decoder = self._makeFromHidden(rep_channels) try: - hidden_encoder.load_weights(self._makeName( - "pretrain_sampler_model", - "hidden_encoder.h5f")) - hidden_decoder.load_weights(self._makeName( - "pretrain_sampler_model", - "hidden_decoder.h5f")) + hidden_encoder.load_weights(self.makeName( + "pretrain_sampler", + "hidden_encoder")) + hidden_decoder.load_weights(self.makeName( + "pretrain_sampler", + "hidden_decoder")) hidden_encoder.trainable = self.retrain hidden_decoder.trainable = self.retrain except Exception as e: @@ -162,7 +152,7 @@ def _makePredictor(self, features): return predictor, predictor, actor, ins, h def _getData(self, *args, **kwargs): - features, targets = self._getAllData(*args, **kwargs) + features, targets = GetAllMultiData(self.num_options, *args, **kwargs) [I, q, g, oin, q_target, g_target,] = features tt, o1, v, qa, ga, I_target = targets if self.use_noise: diff --git a/costar_models/python/costar_models/datasets/h5f_generator.py b/costar_models/python/costar_models/datasets/h5f_generator.py index 504de347e..5efe5dc44 100644 --- a/costar_models/python/costar_models/datasets/h5f_generator.py +++ b/costar_models/python/costar_models/datasets/h5f_generator.py @@ -15,15 +15,8 @@ class H5fGeneratorDataset(NpzGeneratorDataset): takes the load function so all we need to do is implement things so they'll load a particular class. ''' - def __init__(self, name, split=0.1, ): - ''' - Set name of directory to load files from - - ''' - self.name = name - self.split = split - self.train = [] - self.test = [] + def __init__(self, *args, **kwargs): + super(H5fGeneratorDataset, self).__init__(*args, **kwargs) def _load(self, filename): ''' diff --git a/costar_models/python/costar_models/datasets/npy_generator.py b/costar_models/python/costar_models/datasets/npy_generator.py index 53b9d950f..e16b58040 100644 --- a/costar_models/python/costar_models/datasets/npy_generator.py +++ b/costar_models/python/costar_models/datasets/npy_generator.py @@ -8,7 +8,7 @@ class NpzGeneratorDataset(object): Get the list of objects from a folder full of NP arrays. ''' - def __init__(self, name, split=0.1, ): + def __init__(self, name, split=0.1, preload=False): ''' Set name of directory to load files from @@ -16,11 +16,14 @@ def __init__(self, name, split=0.1, ): ----------- name: the directory split: portion of the data files reserved for testing/validation + preload: load all files into memory when starting up ''' self.name = name self.split = split self.train = [] self.test = [] + self.preload = preload + self.preload_cache = {} def write(self, *args, **kwargs): raise NotImplementedError('this dataset does not save things') @@ -37,25 +40,25 @@ def load(self, success_only=False): i = 0 acceptable_files = [] for f in files: - if not f[0] == '.': - #print("%d:"%(i+1), f) - if success_only: - name = f.split('.') - if name[1] == 'failure': + if f[0] == '.': + continue + + if success_only and f.split('.')[1] == 'failure': + continue + + if i < 2: + fsample = self._load(os.path.join(self.name, f)) + for key, value in fsample.items(): + if key not in sample: + sample[key] = value + if value.shape[0] == 0: continue - if i < 2: - fsample = self._load(os.path.join(self.name,f)) - for key, value in fsample.items(): - if key not in sample: - sample[key] = value - if value.shape[0] == 0: - continue - sample[key] = np.concatenate([sample[key],value],axis=0) - i += 1 - acceptable_files.append(f) + sample[key] = np.concatenate([sample[key],value],axis=0) + i += 1 + acceptable_files.append(f) idx = np.array(range(len(acceptable_files))) - length = max(1,int(self.split*len(acceptable_files))) + length = max(1, int(self.split*len(acceptable_files))) print("---------------------------------------------") print("Loaded data.") print("# Total examples:", len(acceptable_files)) @@ -70,6 +73,13 @@ def load(self, success_only=False): filename + ' in training!') np.random.shuffle(self.test) np.random.shuffle(self.train) + + if self.preload: + print("Preloading all files...") + for f in self.test + self.train: + nm = os.path.join(self.name, f) + self.preload_cache[nm] = self._load(nm) + return sample def sampleTrainFilename(self): @@ -94,19 +104,34 @@ def loadTest(self, i): raise RuntimeError('index %d greater than number of files'%i) filename = self.test[i] success = 'success' in filename - return self._load(os.path.join(self.name,filename)), success + nm = os.path.join(self.name, filename) + if nm in self.preload_cache: + return self.preload_cache[nm], success + else: + return self._load(nm), success def sampleTrain(self): filename = self.sampleTrainFilename() - try: - sample = self._load(filename) - except Exception as e: - raise RuntimeError("Could not load file " + filename + ": " + str(e)) + if filename in self.preload_cache: + sample = self.preload_cache[filename] + else: + try: + sample = self._load(filename) + except Exception as e: + raise RuntimeError("Could not load file " + filename + ": " + + str(e)) return sample, filename def sampleTest(self): filename = self.sampleTestFilename() - sample = self._load(filename) + if filename in self.preload_cache: + sample = self.preload_cache[filename] + else: + try: + sample = self._load(filename) + except Exception as e: + raise RuntimeError("Could not load file " + filename + ": " + + str(e)) return sample, filename def _load(self, filename): diff --git a/costar_models/python/costar_models/dvrk.py b/costar_models/python/costar_models/dvrk.py index 1c73e8467..7d8ce43c1 100644 --- a/costar_models/python/costar_models/dvrk.py +++ b/costar_models/python/costar_models/dvrk.py @@ -59,6 +59,37 @@ def MakeJigsawsImageClassifier(model, img_shape): model.classifier = image_encoder return image_encoder +def MakeJigsawsExpand(model, x, h_dim=(12,16)): + ''' + Take a model and project it out to whatever size + ''' + return AddConv2D(x, 64, [1,1], 1, 0.) + +def MakeJigsawsMultiDecoder(model, decoder, num_images=4, h_dim=(12,16)): + ''' + Make multiple images + ''' + h = Input((h_dim[0], h_dim[1], 64),name="h_in") + + # Add some dropout so we don't end up overfitting our examples + x = Dropout(model.dropout_rate)(h) + + xs = [] + for i in range(num_images): + xi = AddConv2D(x, model.encoder_channels, [5, 5], stride=1, + dropout_rate=0.) + xi = decoder(xi) + img_x = Lambda( + lambda y: K.expand_dims(y, 1), + name="img_hypothesis_%d"%i)(xi) + xs.append(img_x) + img_out = Concatenate(axis=1)(xs) + + mm = Model(h, img_out, name="multi") + mm.compile(loss="mae", optimizer=model.getOptimizer()) + + return mm + def MakeJigsawsTransform(model, h_dim=(12,16)): ''' This is the version made for the newer code, it is set up to use both @@ -76,57 +107,57 @@ def MakeJigsawsTransform(model, h_dim=(12,16)): This will also set the "transform_model" field of "model". ''' - h = Input((h_dim[0], h_dim[1], model.encoder_channels),name="h_in") + h = Input((h_dim[0], h_dim[1], 64),name="h_in") h0 = Input((h_dim[0],h_dim[1], model.encoder_channels),name="h0_in") option = Input((model.num_options,),name="t_opt_in") - x = AddConv2D(h, 64, [1,1], 1, 0.) + x = h # This is already encoded x0 = AddConv2D(h0, 64, [1,1], 1, 0.) # Combine the hidden state observations x = Concatenate()([x, x0]) - x = AddConv2D(x, 64, [5,5], 1, model.dropout_rate) + x = AddConv2D(x, 64, [5,5], 1, 0.) + skip0 = x # store this for skip connection + x = AddConv2D(x, 64, [5,5], 2, 0.) skip = x # Add dense information y = AddDense(option, 64, "relu", 0., constraint=None, output=False) - x = TileOnto(x, y, 64, h_dim) + x = TileOnto(x, y, 64, (h_dim[0]/2, h_dim[1]/2), add=True) x = AddConv2D(x, 64, [5,5], 1, 0.) - #x = AddConv2D(x, 128, [5,5], 2, 0.) # --- start ssm block - use_ssm = True - if use_ssm: - def _ssm(x): - return spatial_softmax(x) - x = Lambda(_ssm,name="encoder_spatial_softmax")(x) - x = AddDense(x, 256, "relu", 0., - constraint=None, output=False,) - x = AddDense(x, h_dim[0] * h_dim[1] * 32/4, "relu", 0., constraint=None, output=False) - x = Reshape([h_dim[0]/2, h_dim[1]/2, 32])(x) - else: - x = AddConv2D(x, 128, [5,5], 1, 0.) - x = AddConv2DTranspose(x, 64, [5,5], 2, - model.dropout_rate) - # --- end ssm block + def _ssm(x): + return spatial_softmax(x) + x = Lambda(_ssm,name="encoder_spatial_softmax")(x) + x = AddDense(x, 128, "relu", 0., + constraint=None, output=False,) + x = AddDense(x, h_dim[0] * h_dim[1] * 64/16, "relu", model.dropout_rate, constraint=None, output=False) + x = Reshape([h_dim[0]/4, h_dim[1]/4, 64])(x) + x = AddConv2DTranspose(x, 64, [5,5], 2, 0.) - if model.skip_connections or True: - x = Concatenate()([x, skip]) - - for i in range(1): - #x = TileOnto(x, y, model.num_options, (8,8)) - x = AddConv2D(x, 64, - [7,7], - stride=1, - dropout_rate=model.dropout_rate) + # --- end ssm block + x = Concatenate()([x, skip]) + x = Dropout(model.dropout_rate)(x) + x = AddConv2DTranspose(x, 64, + [5,5], + stride=2, + dropout_rate=model.dropout_rate) + + x = Concatenate()([x, skip0]) + x = AddConv2D(x, 64, + [5,5], + stride=1, + dropout_rate=model.dropout_rate) # -------------------------------------------------------------------- # Put resulting image into the output shape - x = AddConv2D(x, model.encoder_channels, [1, 1], stride=1, - dropout_rate=0.) + #x = AddConv2D(x, model.encoder_channels, [1, 1], stride=1, + # dropout_rate=0.) model.transform_model = Model([h0,h,option], x, name="tform") model.transform_model.compile(loss="mae", optimizer=model.getOptimizer()) + #model.transform_model.summary() return model.transform_model diff --git a/costar_models/python/costar_models/mhp_loss.py b/costar_models/python/costar_models/mhp_loss.py index df98dce58..88067234d 100644 --- a/costar_models/python/costar_models/mhp_loss.py +++ b/costar_models/python/costar_models/mhp_loss.py @@ -157,9 +157,7 @@ def __call__(self, target, pred): xsum = tf.zeros([1, 1]) xmin = tf.ones([1, 1])*1e10 - for i in range(self.num_hypotheses): - target_outputs = _getOutputs(target, self.outputs, 0) pred_outputs = _getOutputs(pred, self.outputs, i) @@ -199,12 +197,15 @@ def _getOutputs(state, outputs, i): ouputs: dimensionality of each output to retrieve in order ''' idx = 0 - separated_outputs = [] - for output_dim in outputs: - # Print statement for debugging: shows ranges for each output, which - # should match the order of provided data. - #print("from ", idx, "to", idx+output_dim) - out = state[:,i,idx:idx+output_dim] - separated_outputs.append(out) - idx += output_dim + if len(outputs) > 1: + separated_outputs = [] + for output_dim in outputs: + # Print statement for debugging: shows ranges for each output, which + # should match the order of provided data. + #print("from ", idx, "to", idx+output_dim) + out = state[:,i,idx:idx+output_dim] + separated_outputs.append(out) + idx += output_dim + else: + separated_outputs = [state[:,i]] return separated_outputs diff --git a/costar_models/python/costar_models/parse.py b/costar_models/python/costar_models/parse.py index d61193352..284cabf06 100644 --- a/costar_models/python/costar_models/parse.py +++ b/costar_models/python/costar_models/parse.py @@ -173,6 +173,9 @@ def GetModelParser(): help="portion of the gpu to allocate for this job", type=float, default=1.) + parser.add_argument("--preload", + help="preload all files into RAM", default=False, + action='store_true') return parser diff --git a/costar_models/python/costar_models/pretrain_image_gan.py b/costar_models/python/costar_models/pretrain_image_gan.py index f8024c188..11a8f5719 100644 --- a/costar_models/python/costar_models/pretrain_image_gan.py +++ b/costar_models/python/costar_models/pretrain_image_gan.py @@ -102,10 +102,12 @@ def _makeImageDiscriminator(self, img_shape): x = AddConv2D(img, 64, [4,4], 1, dr, "same", lrelu=True, bn=False) x0 = AddConv2D(img0, 64, [4,4], 1, dr, "same", lrelu=True, bn=False) x = Add()([x, x0]) - x = AddConv2D(x, 64, [4,4], 2, dr, "same", lrelu=True, bn=True) + #x = Concatenate(axis=-1)([img0, img]) + x = AddConv2D(x, 64, [4,4], 2, dr, "same", lrelu=True, bn=False) x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True, bn=True) x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True, bn=True) - x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid") + x = AddConv2D(x, 1, [1,1], 1, 0., "same", activation="sigmoid", + bn=False) x = AveragePooling2D(pool_size=(8,8))(x) x = Flatten()(x) diff --git a/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py b/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py index 08ab3b400..9168b7cc1 100644 --- a/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py +++ b/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py @@ -28,12 +28,12 @@ def _makeModel(self, image, *args, **kwargs): if self.train_predictor is None: raise RuntimeError('did not make trainable model') - def __init__(self, taskdef, *args, **kwargs): + def __init__(self, *args, **kwargs): ''' As in the other models, we call super() to parse arguments from the command line and set things like our optimizer and learning rate. ''' - super(PretrainImageJigsawsGan, self).__init__(taskdef, *args, **kwargs) + super(PretrainImageJigsawsGan, self).__init__(*args, **kwargs) self.PredictorCb = ImageCb # This is literally the only change from the husky version @@ -80,7 +80,7 @@ def _makePredictor(self, images): self.model = Model([img_in], [gen_out, o1]) self.model.compile( loss=["mae"] + ["binary_crossentropy"], - loss_weights=[100., 1.], + loss_weights=[10., 1.], optimizer=self.getOptimizer()) self.generator = Model([img_in], [gen_out]) @@ -109,10 +109,11 @@ def _makeImageDiscriminator(self, img_shape): x = AddConv2D(img, 64, [4,4], 1, dr, "same", lrelu=True, bn=False) x0 = AddConv2D(img0, 64, [4,4], 1, dr, "same", lrelu=True, bn=False) x = Add()([x, x0]) + #x = Concatenate(axis=-1)([img0, img]) x = AddConv2D(x, 64, [4,4], 2, dr, "same", lrelu=True, bn=False) - x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True) - #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True) - x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid") + x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True, bn=True) + #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True, bn=True) + x = AddConv2D(x, 1, [1,1], 1, 0., "same", activation="sigmoid", bn=False) #x = AveragePooling2D(pool_size=(12,16))(x) x = AveragePooling2D(pool_size=(24,32))(x) diff --git a/costar_models/scripts/ctp_model_tool b/costar_models/scripts/ctp_model_tool index f4848697b..6d35fc2b2 100755 --- a/costar_models/scripts/ctp_model_tool +++ b/costar_models/scripts/ctp_model_tool @@ -27,10 +27,10 @@ def main(args): root += '.' root += tok if data_type == "npz": - dataset = NpzGeneratorDataset(root) + dataset = NpzGeneratorDataset(root, preload=args['preload']) data = dataset.load(success_only = args['success_only']) elif data_type == "h5f": - dataset = H5fGeneratorDataset(root) + dataset = H5fGeneratorDataset(root, preload=args['preload']) data = dataset.load(success_only = args['success_only']) else: raise NotImplementedError('data type not implemented: %s'%data_type) diff --git a/docs/task_learning_experiments.md b/docs/task_learning_experiments.md index d58312536..7afff67a8 100644 --- a/docs/task_learning_experiments.md +++ b/docs/task_learning_experiments.md @@ -85,6 +85,22 @@ rosrun costar_models ctp_model_tool --model pretrain_image --data_file suturing_ rosrun costar_models ctp_model_tool --model pretrain_image_gan --data_file suturing_data.h5f --lr 0.001 --dropout_rate 0.2 --features jigsaws --batch_size 32 ``` +#### Wasserstein GAN + +We also implemented Wasserstein GAN training. + +``` +# Run with wasserstein GAN loss +rosrun costar_models ctp_model_tool --model pretrain_image_gan \ + --features jigsaws --batch_size 64 --data_file suturing_data2.h5f \ + --lr 0.00005 --optimizer rmsprop --steps_per_epoch 100 \ + --dropout_rate 0.1 --load_model --preload --wasserstein +``` + +Some options here: + - `--preload` will try to store the whole data set in memory for faster procesing + - `--wasserstein` will tell the GAN to try something different (wasserstein loss) + ## Training On MARCC MARCC is our cluster for machine learning, equipped with a large set of Tesla K80 GPUs. We assume that when training on a cluster like MARCC, you will not want a full ROS workspace, so instead we assume you will install to some path $COSTAR_PLAN and just run scripts. diff --git a/slurm/ctp.sh b/slurm/ctp.sh index 5920024db..0433ad914 100755 --- a/slurm/ctp.sh +++ b/slurm/ctp.sh @@ -14,9 +14,9 @@ echo "Running $@ on $SLURMD_NODENAME ..." module load tensorflow/cuda-8.0/r1.3 export DATASET="ctp_dec" -export train_discriminator=true -export train_image_encoder=true -export train_multi_encoder=true +export train_discriminator=false +export train_image_encoder=false +export train_multi_encoder=false export train_predictor=false export learning_rate=$1 export dropout=$2 @@ -93,18 +93,22 @@ then --batch_size 64 fi -$HOME/costar_plan/costar_models/scripts/ctp_model_tool \ - --features multi \ - -e 100 \ - --model conditional_image \ - --data_file $HOME/work/$DATASET.h5f \ - --lr $learning_rate \ - --dropout_rate $dropout \ - --model_directory $MODELDIR/ \ - --optimizer $optimizer \ - --steps_per_epoch 500 \ - --loss $loss \ - --batch_size 64 + +if $train_conditional_image +then + $HOME/costar_plan/costar_models/scripts/ctp_model_tool \ + --features multi \ + -e 150 \ + --model conditional_image \ + --data_file $HOME/work/$DATASET.h5f \ + --lr $learning_rate \ + --dropout_rate $dropout \ + --model_directory $MODELDIR/ \ + --optimizer $optimizer \ + --steps_per_epoch 500 \ + --loss $loss \ + --batch_size 64 +fi $HOME/costar_plan/costar_models/scripts/ctp_model_tool \ --features multi \ diff --git a/slurm/ctp_husky.sh b/slurm/ctp_husky.sh index b47462805..2aff7a8e8 100755 --- a/slurm/ctp_husky.sh +++ b/slurm/ctp_husky.sh @@ -15,7 +15,7 @@ module load tensorflow/cuda-8.0/r1.3 export DATASET="husky_data" export train_discriminator=true -export train_image_encoder=false +export train_image_encoder=true export train_multi_encoder=false export train_predictor=false export train_gans=true @@ -34,7 +34,7 @@ then --features multi \ -e 100 \ --model discriminator \ - --data_file $HOME/work/$DATASET.h5f \ + --data_file $HOME/work/$DATASET.npz \ --features husky \ --lr $learning_rate \ --dropout_rate $dropout \ @@ -49,7 +49,7 @@ then --features multi \ -e 100 \ --model goal_discriminator \ - --data_file $HOME/work/$DATASET.h5f \ + --data_file $HOME/work/$DATASET.npz \ --lr $learning_rate \ --features husky \ --dropout_rate $dropout \ diff --git a/slurm/ctp_suturing.sh b/slurm/ctp_suturing.sh index 0ba78881b..a417a3d10 100755 --- a/slurm/ctp_suturing.sh +++ b/slurm/ctp_suturing.sh @@ -1,5 +1,5 @@ #!/bin/bash -l -#SBATCH --job-name=ctpHusky +#SBATCH --job-name=jigsaws #SBATCH --time=0-48:0:0 #SBATCH --partition=gpu #SBATCH --gres=gpu:1 @@ -23,6 +23,40 @@ export noise_dim=$4 export loss=$5 export MODELDIR="$HOME/.costar/suturing_$learning_rate$optimizer$dropout$noise_dim$loss" +if $train_discriminator +then + echo "Training discriminator 1" + $HOME/costar_plan/costar_models/scripts/ctp_model_tool \ + --features multi \ + -e 100 \ + --model discriminator \ + --data_file $HOME/work/$DATASET.h5f \ + --features jigsaws \ + --lr $learning_rate \ + --dropout_rate $dropout \ + --model_directory $MODELDIR/ \ + --optimizer $optimizer \ + --steps_per_epoch 500 \ + --noise_dim $noise_dim \ + --loss $loss \ + --batch_size 64 + echo "Training discriminator 2" + $HOME/costar_plan/costar_models/scripts/ctp_model_tool \ + --features multi \ + -e 100 \ + --model goal_discriminator \ + --data_file $HOME/work/$DATASET.h5f \ + --lr $learning_rate \ + --features jigsaws \ + --dropout_rate $dropout \ + --model_directory $MODELDIR/ \ + --optimizer $optimizer \ + --steps_per_epoch 500 \ + --noise_dim $noise_dim \ + --loss $loss \ + --batch_size 64 +fi + if $train_image_encoder @@ -32,7 +66,7 @@ then --features multi \ -e 100 \ --model pretrain_image_encoder \ - --data_file $HOME/work/$DATASET.npz \ + --data_file $HOME/work/$DATASET.h5f \ --lr $learning_rate \ --dropout_rate $dropout \ --features jigsaws \ @@ -49,10 +83,11 @@ $HOME/costar_plan/costar_models/scripts/ctp_model_tool \ --features multi \ -e 100 \ --model conditional_image \ - --data_file $HOME/work/$DATASET.npz \ + --data_file $HOME/work/$DATASET.h5f \ --lr $learning_rate \ --dropout_rate $dropout \ --model_directory $MODELDIR/ \ + --features jigsaws \ --optimizer $optimizer \ --use_noise true \ --steps_per_epoch 500 \