diff --git a/costar_models/python/costar_models/callbacks.py b/costar_models/python/costar_models/callbacks.py
index f7f5ccddb..aa7b1e092 100644
--- a/costar_models/python/costar_models/callbacks.py
+++ b/costar_models/python/costar_models/callbacks.py
@@ -312,6 +312,85 @@ def on_epoch_end(self, epoch, logs={}):
             fig.savefig(name, bbox_inches="tight")
             plt.close(fig)
 
+class PredictorShowImageOnlyMultiStep(keras.callbacks.Callback):
+    '''
+    Save an image showing what some number of frames and associated predictions
+    will look like at the end of an epoch.
+    '''
+
+    def __init__(self, predictor, features, targets,
+            model_directory=DEFAULT_MODEL_DIRECTORY,
+            num_hypotheses=4,
+            verbose=False,
+            features_name=None,
+            noise_dim=64,
+            use_noise=False,
+            name="model",
+            use_prev_option=True,
+            min_idx=0, max_idx=66, step=11):
+        '''
+        Set up a data set we can use to output validation images.
+
+        Parameters:
+        -----------
+        predictor: model used to generate predictions
+        targets: training target info, in compressed form
+        num_hypotheses: how many outputs to expect
+        verbose: print out extra information
+        '''
+
+        if features_name is None:
+            self.features_name = "def"
+        else:
+            self.features_name = features_name
+        self.verbose = verbose
+        self.predictor = predictor
+        self.idxs = range(min_idx, max_idx, step)
+        self.num = len(self.idxs)
+        self.features = [f[self.idxs] for f in features]
+        self.targets = [np.squeeze(t[self.idxs]) for t in targets]
+        self.epoch = 0
+        self.num_hypotheses = num_hypotheses
+        self.directory = os.path.join(model_directory,'debug')
+        self.noise_dim = noise_dim
+        self.use_noise = use_noise
+        if not os.path.exists(self.directory):
+            os.makedirs(self.directory)
+
+    def on_epoch_end(self, epoch, logs={}):
+        # take the model and print it out
+        self.epoch += 1
+        data = self.predictor.predict(self.features)
+        plt.ioff()
+        if self.verbose:
+            print("============================")
+        for j in range(self.num):
+            name = os.path.join(self.directory,
+                    "%s_predictor_epoch%03d_result%d.png"%(self.features_name,
+                        self.epoch, j))
+            fig = plt.figure()#figsize=(3+int(1.5*self.num_hypotheses),2))
+
+            plt.subplot(2,2+self.num_hypotheses,1)
+            plt.title('Input Image')
+            plt.imshow(self.features[0][j])
+            for k in range(2):
+                # This counts off rows
+                rand_offset = (k*(2+self.num_hypotheses))
+                plt.subplot(2,2+self.num_hypotheses,2+self.num_hypotheses+rand_offset)
+                plt.title('Observed Goal')
+                plt.imshow(np.squeeze(self.targets[k][j]))
+                for i in range(self.num_hypotheses):
+                    plt.subplot(2,2+self.num_hypotheses,i+2+rand_offset)
+                    plt.imshow(np.squeeze(data[k][j][i]))
+                    plt.title('Hypothesis %d'%(i+1))
+
+            if self.verbose:
+                print(name)
+            fig.savefig(name, bbox_inches="tight")
+            plt.close(fig)
+
+
+
 class PredictorShowImageOnly(keras.callbacks.Callback):
     '''
     Save an image showing what some number of frames and associated predictions
diff --git a/costar_models/python/costar_models/conditional_image.py b/costar_models/python/costar_models/conditional_image.py
index 1006a3ec9..c00e12579 100644
--- a/costar_models/python/costar_models/conditional_image.py
+++ b/costar_models/python/costar_models/conditional_image.py
@@ -104,12 +104,6 @@ def _makePredictor(self, features):
         #value_out = value_model([h,label_in])
         #next_option_out = next_model([h,label_in])
 
-        # create input for controlling noise output if that's what we decide
-        # that we want to do
-        if self.use_noise:
-            z = Input((self.num_hypotheses, self.noise_dim))
-            ins += [z]
-
         next_option_in = Input((1,), name="next_option_in")
         next_option_in2 = Input((1,), name="next_option_in2")
         ins += [next_option_in, next_option_in2]
@@ -159,7 +153,8 @@ def _makePredictor(self, features):
             train_predictor.compile(
                     loss=[lfn, lfn, "binary_crossentropy", val_loss,
                         lfn2, lfn2, "categorical_crossentropy"],
-                    loss_weights=[1., 1., 0.1, 0.1, 1., 0.2, 1e-4],
+                    #loss_weights=[1., 1., 0.1, 0.1, 1., 0.2, 1e-3],
+                    loss_weights=[1., 1., 0., 0., 0., 0., 1e-3],
                     optimizer=self.getOptimizer())
         else:
             train_predictor = Model(ins + [label_in],
diff --git a/costar_models/python/costar_models/conditional_image_gan.py b/costar_models/python/costar_models/conditional_image_gan.py
index 7135aa9bc..e39a9847d 100644
--- a/costar_models/python/costar_models/conditional_image_gan.py
+++ b/costar_models/python/costar_models/conditional_image_gan.py
@@ -137,8 +137,6 @@ def _makePredictor(self, features):
                 loss=["mae"]*2 + ["binary_crossentropy"],
                 loss_weights=[100., 100., 1.],
                 optimizer=self.getOptimizer())
-        model.summary()
-        self.discriminator.summary()
         self.model = model
 
         return predictor, model, model, ins, h
@@ -200,10 +198,9 @@ def _makeImageDiscriminator(self, img_shape):
         #x = Concatenate()([x1, x2])
         x = x2
         x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True)
-        #x = AddConv2D(x, 128, [4,4], 1, dr, "same", lrelu=True)
         x= AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True)
-        #x = AddConv2D(x, 256, [4,4], 1, dr, "same", lrelu=True)
-        x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid")
+        x = AddConv2D(x, 1, [1,1], 1, 0., "same", activation="sigmoid",
+                bn=False)
 
         #x = MaxPooling2D(pool_size=(8,8))(x)
         x = AveragePooling2D(pool_size=(8,8))(x)
diff --git a/costar_models/python/costar_models/conditional_image_gan_jigsaws.py b/costar_models/python/costar_models/conditional_image_gan_jigsaws.py
index 523f9065c..01e77b102 100644
--- a/costar_models/python/costar_models/conditional_image_gan_jigsaws.py
+++ b/costar_models/python/costar_models/conditional_image_gan_jigsaws.py
@@ -94,7 +94,6 @@ def _makeModel(self, image, *args, **kwargs):
                 loss_weights=[100., 100., 1.],
                 optimizer=self.getOptimizer())
         model.summary()
-        self.discriminator.summary()
         self.model = model
 
         self.predictor = generator
@@ -137,16 +136,20 @@ def _makeImageDiscriminator(self, img_shape):
         xg1 = AddConv2D(img_goal, 64, [4,4], 1, dr, "same", lrelu=True, bn=False)
         xg2 = AddConv2D(img_goal2, 64, [4,4], 1, dr, "same", lrelu=True, bn=False)
 
-        x1 = Add()([x0, xobs, xg1])
-        x2 = Add()([x0, xg1, xg2])
+        #x1 = Add()([x0, xobs, xg1])
+        #x2 = Add()([x0, xg1, xg2])
+        x1 = Add()([xobs, xg1])
+        x2 = Add()([xg1, xg2])
+        #x1 = Concatenate(axis=-1)([img, img_goal])
+        #x2 = Concatenate(axis=-1)([img_goal, img_goal2])
         
         # -------------------------------------------------------------
         y = OneHot(self.num_options)(option)
         y = AddDense(y, 64, "lrelu", dr)
-        x1 = TileOnto(x1, y, 64, img_size, add=True)
+        #x1 = TileOnto(x1, y, 64, img_size, add=True)
         x1 = AddConv2D(x1, 64, [4,4], 2, dr, "same", lrelu=True, bn=False)
-        x1 = AddConv2D(x1, 128, [4,4], 2, dr, "same", lrelu=True)
-        #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True)
+        x1 = AddConv2D(x1, 128, [4,4], 2, dr, "same", lrelu=True, bn=True)
+        #x1 = AddConv2D(x1, 256, [4,4], 2, dr, "same", lrelu=True, bn=True)
         #x1 = AddConv2D(x1, 1, [4,4], 1, 0., "same", activation="sigmoid")
 
         # -------------------------------------------------------------
@@ -154,14 +157,18 @@ def _makeImageDiscriminator(self, img_shape):
         y = AddDense(y, 64, "lrelu", dr)
         x2 = TileOnto(x2, y, 64, img_size, add=True)
         x2 = AddConv2D(x2, 64, [4,4], 2, dr, "same", lrelu=True, bn=False)
-        x2 = AddConv2D(x2, 128, [4,4], 2, dr, "same", lrelu=True)
-        #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True)
 
-        x = Concatenate(axis=-1)([x1, x2])
+        # Final block
+        x = x2
+        x2 = AddConv2D(x2, 128, [4,4], 2, dr, "same", lrelu=True, bn=True)
+        x2 = AddConv2D(x2, 256, [4,4], 2, dr, "same", lrelu=True, bn=True)
+        #x = Concatenate(axis=-1)([x1, x2])
         #x = Add()([x1, x2])
-        x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid")
-        #x = AveragePooling2D(pool_size=(12,16))(x)
-        x = AveragePooling2D(pool_size=(24,32))(x)
+        x = AddConv2D(x2, 1, [1,1], 1, 0., "same", activation="sigmoid", bn=False)
+
+        # Combine
+        x = AveragePooling2D(pool_size=(12,16))(x)
+        #x = AveragePooling2D(pool_size=(24,32))(x)
         x = Flatten()(x)
         discrim = Model(ins, x, name="image_discriminator")
         self.lr *= 2.
diff --git a/costar_models/python/costar_models/conditional_image_jigsaws.py b/costar_models/python/costar_models/conditional_image_jigsaws.py
index dcb03e3fa..29f1933ef 100644
--- a/costar_models/python/costar_models/conditional_image_jigsaws.py
+++ b/costar_models/python/costar_models/conditional_image_jigsaws.py
@@ -15,10 +15,7 @@
 from keras.optimizers import Adam
 from matplotlib import pyplot as plt
 
-from .abstract import *
-from .callbacks import *
 from .robot_multi_models import *
-from .split import *
 from .mhp_loss import *
 from .loss import *
 from .sampler2 import *
@@ -33,10 +30,14 @@ def __init__(self, *args, **kwargs):
         super(ConditionalImageJigsaws, self).__init__(*args, **kwargs)
 
         self.num_options = SuturingNumOptions()
+        self.PredictorCb = PredictorShowImageOnlyMultiStep
 
     def _makeModel(self, image, *args, **kwargs):
 
         img_shape = image.shape[1:]
+        img_size = 1.
+        for dim in img_shape:
+            img_size *= dim
 
         img0_in = Input(img_shape, name="predictor_img0_in")
         img_in = Input(img_shape, name="predictor_img_in")
@@ -52,7 +53,7 @@ def _makeModel(self, image, *args, **kwargs):
 
         # =====================================================================
         # Load weights and stuff
-        LoadEncoderWeights(self, encoder, decoder)
+        LoadEncoderWeights(self, encoder, decoder, gan=True)
         image_discriminator = LoadGoalClassifierWeights(self,
                 make_classifier_fn=MakeJigsawsImageClassifier,
                 img_shape=img_shape)
@@ -82,17 +83,30 @@ def _makeModel(self, image, *args, **kwargs):
         option_in2 = Input((1,), name="option_in2")
         ins += [option_in, option_in2]
 
+        # --------------------------------------------------------------------
+        # Create multiple hypothesis loss
+        lfn = MhpLossWithShape(
+                num_hypotheses=self.num_hypotheses,
+                outputs=[img_size],
+                weights=[1.0],
+                loss=[self.loss],
+                avg_weight=0.05,
+                )
+
+        # --------------------------------------------------------------------
         # Image model
+        h_dim = (12, 16)
+        multi_decoder = MakeJigsawsMultiDecoder(self, decoder,
+                self.num_hypotheses, h_dim)
         y = Flatten()(OneHot(self.num_options)(option_in))
         y2 = Flatten()(OneHot(self.num_options)(option_in2))
-        x = h
-        tform = MakeJigsawsTransform(self, h_dim=(12,16))
-        x = tform([h0, h, y])
+        x = MakeJigsawsExpand(self, h, h_dim)
+        tform = MakeJigsawsTransform(self, h_dim)
+        x = tform([h0, x, y])
         x2 = tform([h0, x, y2])
-        image_out, image_out2 = decoder([x]), decoder([x2])
-        disc_out2 = image_discriminator(image_out2)
+        image_out, image_out2 = multi_decoder([x]), multi_decoder([x2])
+        #disc_out2 = image_discriminator(image_out2)
 
-        lfn = self.loss
         lfn2 = "logcosh"
 
         # =====================================================================
@@ -100,18 +114,19 @@ def _makeModel(self, image, *args, **kwargs):
         predictor = Model(ins + [prev_option_in],
                 [image_out, image_out2, next_option_out])
         predictor.compile(
-                loss=[lfn, lfn, "binary_crossentropy"],
+                loss=[self.loss, self.loss, "binary_crossentropy"],
                 loss_weights=[1., 1., 0.1],
                 optimizer=self.getOptimizer())
         model = Model(ins + [prev_option_in],
-                [image_out, image_out2, next_option_out, disc_out2])
+                [image_out, image_out2, next_option_out])#, disc_out2])
         model.compile(
-                loss=[lfn, lfn, "binary_crossentropy", "categorical_crossentropy"],
-                loss_weights=[1., 1., 0.1, 1e-4],
+                loss=[lfn, lfn, "binary_crossentropy"],# "categorical_crossentropy"],
+                loss_weights=[1., 1., 0.1],#, 1e-3],
                 optimizer=self.getOptimizer())
 
         self.predictor = predictor
         self.model = model
+        self.model.summary()
 
     def _getData(self, image, label, goal_image, goal_label,
             prev_label, *args, **kwargs):
@@ -128,5 +143,7 @@ def _getData(self, image, label, goal_image, goal_label,
 
         label_1h = np.squeeze(ToOneHot2D(label, self.num_options))
         label2_1h = np.squeeze(ToOneHot2D(label2, self.num_options))
-        return [image0, image, label, goal_label, prev_label], [goal_image, goal_image2, label_1h, label2_1h]
+        return ([image0, image, label, goal_label, prev_label],
+                [np.expand_dims(goal_image, axis=1),
+                 np.expand_dims(goal_image2, axis=1), label_1h])#, label2_1h]
 
diff --git a/costar_models/python/costar_models/conditional_sampler2.py b/costar_models/python/costar_models/conditional_sampler2.py
index 239f9afd3..719f1db0b 100644
--- a/costar_models/python/costar_models/conditional_sampler2.py
+++ b/costar_models/python/costar_models/conditional_sampler2.py
@@ -67,35 +67,25 @@ def _makePredictor(self, features):
         label_in = Input((1,))
         ins = [img_in, arm_in, gripper_in, label_in]
 
-        encoder = self._makeImageEncoder(img_shape)
-        try:
-            encoder.load_weights(self._makeName(
-                "pretrain_image_encoder_model",
-                "image_encoder.h5f"))
-            encoder.trainable = self.retrain
-        except Exception as e:
-            raise e
-
         if self.skip_connections:
-            decoder = self._makeImageDecoder(self.hidden_shape,self.skip_shape)
+            encoder = self._makeImageEncoder2(img_shape)
+            decoder = self._makeImageDecoder2(self.hidden_shape)
         else:
+            encoder = self._makeImageEncoder(img_shape)
             decoder = self._makeImageDecoder(self.hidden_shape)
-        try:
-            decoder.load_weights(self._makeName(
-                "pretrain_image_encoder_model",
-                "image_decoder.h5f"))
-            decoder.trainable = self.retrain
-        except Exception as e:
-            raise e
 
+        LoadEncoderWeights(self, encoder, decoder)
+        image_discriminator = LoadGoalClassifierWeights(self,
+                make_classifier_fn=MakeImageClassifier,
+                img_shape=img_shape)
+
+        # =====================================================================
+        # Load the arm and gripper representation
         rep_channels = self.encoder_channels
         sencoder = self._makeStateEncoder(arm_size, gripper_size, False)
         sdecoder = self._makeStateDecoder(arm_size, gripper_size,
                 rep_channels)
 
-        # =====================================================================
-        # Load the arm and gripper representation
-
         # =====================================================================
         # combine these models together with state information and label
         # information
@@ -104,12 +94,12 @@ def _makePredictor(self, features):
         hidden_decoder = self._makeFromHidden(rep_channels)
 
         try:
-            hidden_encoder.load_weights(self._makeName(
-                "pretrain_sampler_model",
-                "hidden_encoder.h5f"))
-            hidden_decoder.load_weights(self._makeName(
-                "pretrain_sampler_model",
-                "hidden_decoder.h5f"))
+            hidden_encoder.load_weights(self.makeName(
+                "pretrain_sampler",
+                "hidden_encoder"))
+            hidden_decoder.load_weights(self.makeName(
+                "pretrain_sampler",
+                "hidden_decoder"))
             hidden_encoder.trainable = self.retrain
             hidden_decoder.trainable = self.retrain
         except Exception as e:
@@ -162,7 +152,7 @@ def _makePredictor(self, features):
         return predictor, predictor, actor, ins, h
 
     def _getData(self, *args, **kwargs):
-        features, targets = self._getAllData(*args, **kwargs)
+        features, targets = GetAllMultiData(self.num_options, *args, **kwargs)
         [I, q, g, oin, q_target, g_target,] = features
         tt, o1, v, qa, ga, I_target = targets
         if self.use_noise:
diff --git a/costar_models/python/costar_models/datasets/h5f_generator.py b/costar_models/python/costar_models/datasets/h5f_generator.py
index 504de347e..5efe5dc44 100644
--- a/costar_models/python/costar_models/datasets/h5f_generator.py
+++ b/costar_models/python/costar_models/datasets/h5f_generator.py
@@ -15,15 +15,8 @@ class H5fGeneratorDataset(NpzGeneratorDataset):
     takes the load function so all we need to do is implement things so they'll
     load a particular class.
     '''
-    def __init__(self, name, split=0.1, ):
-        '''
-        Set name of directory to load files from
-
-        '''
-        self.name = name 
-        self.split = split
-        self.train = []
-        self.test = []
+    def __init__(self, *args, **kwargs):
+        super(H5fGeneratorDataset, self).__init__(*args, **kwargs)
 
     def _load(self, filename):
         '''
diff --git a/costar_models/python/costar_models/datasets/npy_generator.py b/costar_models/python/costar_models/datasets/npy_generator.py
index 53b9d950f..e16b58040 100644
--- a/costar_models/python/costar_models/datasets/npy_generator.py
+++ b/costar_models/python/costar_models/datasets/npy_generator.py
@@ -8,7 +8,7 @@ class NpzGeneratorDataset(object):
     Get the list of objects from a folder full of NP arrays. 
     '''
 
-    def __init__(self, name, split=0.1, ):
+    def __init__(self, name, split=0.1, preload=False):
         '''
         Set name of directory to load files from
 
@@ -16,11 +16,14 @@ def __init__(self, name, split=0.1, ):
         -----------
         name: the directory
         split: portion of the data files reserved for testing/validation
+        preload: load all files into memory when starting up
         '''
         self.name = name 
         self.split = split
         self.train = []
         self.test = []
+        self.preload = preload
+        self.preload_cache = {}
 
     def write(self, *args, **kwargs):
         raise NotImplementedError('this dataset does not save things')
@@ -37,25 +40,25 @@ def load(self, success_only=False):
         i = 0
         acceptable_files = []
         for f in files:
-            if not f[0] == '.':
-                #print("%d:"%(i+1), f)
-                if success_only:
-                    name = f.split('.')
-                    if name[1] == 'failure':
+            if f[0] == '.':
+                continue
+
+            if success_only and f.split('.')[1] == 'failure':
+                continue
+
+            if i < 2:
+                fsample = self._load(os.path.join(self.name, f))
+                for key, value in fsample.items():
+                    if key not in sample:
+                        sample[key] = value
+                    if value.shape[0] == 0:
                         continue
-                if i < 2:
-                    fsample = self._load(os.path.join(self.name,f))
-                    for key, value in fsample.items():
-                        if key not in sample:
-                            sample[key] = value
-                        if value.shape[0] == 0:
-                            continue
-                        sample[key] = np.concatenate([sample[key],value],axis=0)
-                i += 1
-                acceptable_files.append(f)
+                    sample[key] = np.concatenate([sample[key],value],axis=0)
+            i += 1
+            acceptable_files.append(f)
 
         idx = np.array(range(len(acceptable_files)))
-        length = max(1,int(self.split*len(acceptable_files)))
+        length = max(1, int(self.split*len(acceptable_files)))
         print("---------------------------------------------")
         print("Loaded data.")
         print("# Total examples:", len(acceptable_files))
@@ -70,6 +73,13 @@ def load(self, success_only=False):
                                    filename + ' in training!')
         np.random.shuffle(self.test)
         np.random.shuffle(self.train)
+
+        if self.preload:
+            print("Preloading all files...")
+            for f in self.test + self.train:
+                nm = os.path.join(self.name, f)
+                self.preload_cache[nm] = self._load(nm)
+
         return sample
 
     def sampleTrainFilename(self):
@@ -94,19 +104,34 @@ def loadTest(self, i):
             raise RuntimeError('index %d greater than number of files'%i)
         filename = self.test[i]
         success = 'success' in filename
-        return self._load(os.path.join(self.name,filename)), success
+        nm = os.path.join(self.name, filename)
+        if nm in self.preload_cache:
+            return self.preload_cache[nm], success
+        else:
+            return self._load(nm), success
 
     def sampleTrain(self):
         filename = self.sampleTrainFilename()
-        try:
-            sample = self._load(filename)
-        except Exception as e:
-            raise RuntimeError("Could not load file " + filename + ": " + str(e))
+        if filename in self.preload_cache:
+            sample = self.preload_cache[filename]
+        else:
+            try:
+                sample = self._load(filename)
+            except Exception as e:
+                raise RuntimeError("Could not load file " + filename + ": "
+                        + str(e))
         return sample, filename
 
     def sampleTest(self):
         filename = self.sampleTestFilename()
-        sample = self._load(filename)
+        if filename in self.preload_cache:
+            sample = self.preload_cache[filename]
+        else:
+            try:
+                sample = self._load(filename)
+            except Exception as e:
+                raise RuntimeError("Could not load file " + filename + ": "
+                        + str(e))
         return sample, filename
 
     def _load(self, filename):
diff --git a/costar_models/python/costar_models/dvrk.py b/costar_models/python/costar_models/dvrk.py
index 1c73e8467..7d8ce43c1 100644
--- a/costar_models/python/costar_models/dvrk.py
+++ b/costar_models/python/costar_models/dvrk.py
@@ -59,6 +59,37 @@ def MakeJigsawsImageClassifier(model, img_shape):
     model.classifier = image_encoder
     return image_encoder
 
+def MakeJigsawsExpand(model, x, h_dim=(12,16)):
+    '''
+    Take a model and project it out to whatever size
+    '''
+    return AddConv2D(x, 64, [1,1], 1, 0.)
+
+def MakeJigsawsMultiDecoder(model, decoder, num_images=4, h_dim=(12,16)):
+    '''
+    Make multiple images
+    '''
+    h = Input((h_dim[0], h_dim[1], 64),name="h_in")
+
+    # Add some dropout so we don't end up overfitting our examples
+    x = Dropout(model.dropout_rate)(h)
+
+    xs = []
+    for i in range(num_images):
+        xi = AddConv2D(x, model.encoder_channels, [5, 5], stride=1,
+                dropout_rate=0.)
+        xi = decoder(xi)
+        img_x = Lambda(
+            lambda y: K.expand_dims(y, 1),
+            name="img_hypothesis_%d"%i)(xi)
+        xs.append(img_x)
+    img_out = Concatenate(axis=1)(xs)
+
+    mm = Model(h, img_out, name="multi")
+    mm.compile(loss="mae", optimizer=model.getOptimizer())
+
+    return mm
+
 def MakeJigsawsTransform(model, h_dim=(12,16)):
     '''
     This is the version made for the newer code, it is set up to use both
@@ -76,57 +107,57 @@ def MakeJigsawsTransform(model, h_dim=(12,16)):
 
     This will also set the "transform_model" field of "model".
     '''
-    h = Input((h_dim[0], h_dim[1], model.encoder_channels),name="h_in")
+    h = Input((h_dim[0], h_dim[1], 64),name="h_in")
     h0 = Input((h_dim[0],h_dim[1], model.encoder_channels),name="h0_in")
     option = Input((model.num_options,),name="t_opt_in")
-    x = AddConv2D(h, 64, [1,1], 1, 0.)
+    x = h # This is already encoded
     x0 = AddConv2D(h0, 64, [1,1], 1, 0.)
 
     # Combine the hidden state observations
     x = Concatenate()([x, x0])
-    x = AddConv2D(x, 64, [5,5], 1, model.dropout_rate)
+    x = AddConv2D(x, 64, [5,5], 1, 0.)
+    skip0 = x
 
     # store this for skip connection
+    x = AddConv2D(x, 64, [5,5], 2, 0.)
     skip = x
 
     # Add dense information
     y = AddDense(option, 64, "relu", 0., constraint=None, output=False)
-    x = TileOnto(x, y, 64, h_dim)
+    x = TileOnto(x, y, 64, (h_dim[0]/2, h_dim[1]/2), add=True)
     x = AddConv2D(x, 64, [5,5], 1, 0.)
-    #x = AddConv2D(x, 128, [5,5], 2, 0.)
 
     # --- start ssm block
-    use_ssm = True
-    if use_ssm:
-        def _ssm(x):
-            return spatial_softmax(x)
-        x = Lambda(_ssm,name="encoder_spatial_softmax")(x)
-        x = AddDense(x, 256, "relu", 0.,
-                constraint=None, output=False,)
-        x = AddDense(x, h_dim[0] * h_dim[1] * 32/4, "relu", 0., constraint=None, output=False)
-        x = Reshape([h_dim[0]/2, h_dim[1]/2, 32])(x)
-    else:
-        x = AddConv2D(x, 128, [5,5], 1, 0.)
-    x = AddConv2DTranspose(x, 64, [5,5], 2,
-            model.dropout_rate)
-    # --- end ssm block
+    def _ssm(x):
+        return spatial_softmax(x)
+    x = Lambda(_ssm,name="encoder_spatial_softmax")(x)
+    x = AddDense(x, 128, "relu", 0.,
+            constraint=None, output=False,)
+    x = AddDense(x, h_dim[0] * h_dim[1] * 64/16, "relu", model.dropout_rate, constraint=None, output=False)
+    x = Reshape([h_dim[0]/4, h_dim[1]/4, 64])(x)
+    x = AddConv2DTranspose(x, 64, [5,5], 2, 0.)
 
-    if model.skip_connections or True:
-        x = Concatenate()([x, skip])
-
-    for i in range(1):
-        #x = TileOnto(x, y, model.num_options, (8,8))
-        x = AddConv2D(x, 64,
-                [7,7],
-                stride=1,
-                dropout_rate=model.dropout_rate)
+    # --- end ssm block
+    x = Concatenate()([x, skip])
+    x = Dropout(model.dropout_rate)(x)
+    x = AddConv2DTranspose(x, 64,
+            [5,5],
+            stride=2,
+            dropout_rate=model.dropout_rate)
+
+    x = Concatenate()([x, skip0])
+    x = AddConv2D(x, 64,
+            [5,5],
+            stride=1,
+            dropout_rate=model.dropout_rate)
 
     # --------------------------------------------------------------------
     # Put resulting image into the output shape
-    x = AddConv2D(x, model.encoder_channels, [1, 1], stride=1,
-            dropout_rate=0.)
+    #x = AddConv2D(x, model.encoder_channels, [1, 1], stride=1,
+    #        dropout_rate=0.)
     model.transform_model = Model([h0,h,option], x, name="tform")
     model.transform_model.compile(loss="mae", optimizer=model.getOptimizer())
+    #model.transform_model.summary()
     return model.transform_model
 
 
diff --git a/costar_models/python/costar_models/mhp_loss.py b/costar_models/python/costar_models/mhp_loss.py
index df98dce58..88067234d 100644
--- a/costar_models/python/costar_models/mhp_loss.py
+++ b/costar_models/python/costar_models/mhp_loss.py
@@ -157,9 +157,7 @@ def __call__(self, target, pred):
         xsum = tf.zeros([1, 1])
         xmin = tf.ones([1, 1])*1e10
 
-
         for i in range(self.num_hypotheses):
-
             target_outputs = _getOutputs(target, self.outputs, 0)
             pred_outputs = _getOutputs(pred, self.outputs, i)
             
@@ -199,12 +197,15 @@ def _getOutputs(state, outputs, i):
     ouputs: dimensionality of each output to retrieve in order
     '''
     idx = 0
-    separated_outputs = []
-    for output_dim in outputs:
-        # Print statement for debugging: shows ranges for each output, which
-        # should match the order of provided data.
-        #print("from ", idx, "to", idx+output_dim)
-        out = state[:,i,idx:idx+output_dim]
-        separated_outputs.append(out)
-        idx += output_dim
+    if len(outputs) > 1:
+      separated_outputs = []
+      for output_dim in outputs:
+          # Print statement for debugging: shows ranges for each output, which
+          # should match the order of provided data.
+          #print("from ", idx, "to", idx+output_dim)
+          out = state[:,i,idx:idx+output_dim]
+          separated_outputs.append(out)
+          idx += output_dim
+    else:
+      separated_outputs = [state[:,i]]
     return separated_outputs
diff --git a/costar_models/python/costar_models/parse.py b/costar_models/python/costar_models/parse.py
index d61193352..284cabf06 100644
--- a/costar_models/python/costar_models/parse.py
+++ b/costar_models/python/costar_models/parse.py
@@ -173,6 +173,9 @@ def GetModelParser():
                         help="portion of the gpu to allocate for this job",
                         type=float,
                         default=1.)
+    parser.add_argument("--preload",
+                        help="preload all files into RAM", default=False,
+                        action='store_true')
 
     return parser
 
diff --git a/costar_models/python/costar_models/pretrain_image_gan.py b/costar_models/python/costar_models/pretrain_image_gan.py
index f8024c188..11a8f5719 100644
--- a/costar_models/python/costar_models/pretrain_image_gan.py
+++ b/costar_models/python/costar_models/pretrain_image_gan.py
@@ -102,10 +102,12 @@ def _makeImageDiscriminator(self, img_shape):
         x = AddConv2D(img, 64, [4,4], 1, dr, "same", lrelu=True, bn=False)
         x0 = AddConv2D(img0, 64, [4,4], 1, dr, "same", lrelu=True, bn=False)
         x = Add()([x, x0])
-        x = AddConv2D(x, 64, [4,4], 2, dr, "same", lrelu=True, bn=True)
+        #x = Concatenate(axis=-1)([img0, img])
+        x = AddConv2D(x, 64, [4,4], 2, dr, "same", lrelu=True, bn=False)
         x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True, bn=True)
         x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True, bn=True)
-        x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid")
+        x = AddConv2D(x, 1, [1,1], 1, 0., "same", activation="sigmoid",
+                bn=False)
         x = AveragePooling2D(pool_size=(8,8))(x)
 
         x = Flatten()(x)
diff --git a/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py b/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py
index 08ab3b400..9168b7cc1 100644
--- a/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py
+++ b/costar_models/python/costar_models/pretrain_image_jigsaws_gan.py
@@ -28,12 +28,12 @@ def _makeModel(self, image, *args, **kwargs):
         if self.train_predictor is None:
             raise RuntimeError('did not make trainable model')
 
-    def __init__(self, taskdef, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
         '''
         As in the other models, we call super() to parse arguments from the
         command line and set things like our optimizer and learning rate.
         '''
-        super(PretrainImageJigsawsGan, self).__init__(taskdef, *args, **kwargs)
+        super(PretrainImageJigsawsGan, self).__init__(*args, **kwargs)
         self.PredictorCb = ImageCb
 
         # This is literally the only change from the husky version
@@ -80,7 +80,7 @@ def _makePredictor(self, images):
         self.model = Model([img_in], [gen_out, o1])
         self.model.compile(
                 loss=["mae"] + ["binary_crossentropy"],
-                loss_weights=[100., 1.],
+                loss_weights=[10., 1.],
                 optimizer=self.getOptimizer())
 
         self.generator = Model([img_in], [gen_out])
@@ -109,10 +109,11 @@ def _makeImageDiscriminator(self, img_shape):
         x = AddConv2D(img, 64, [4,4], 1, dr, "same", lrelu=True, bn=False)
         x0 = AddConv2D(img0, 64, [4,4], 1, dr, "same", lrelu=True, bn=False)
         x = Add()([x, x0])
+        #x = Concatenate(axis=-1)([img0, img])
         x = AddConv2D(x, 64, [4,4], 2, dr, "same", lrelu=True, bn=False)
-        x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True)
-        #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True)
-        x = AddConv2D(x, 1, [4,4], 1, 0., "same", activation="sigmoid")
+        x = AddConv2D(x, 128, [4,4], 2, dr, "same", lrelu=True, bn=True)
+        #x = AddConv2D(x, 256, [4,4], 2, dr, "same", lrelu=True, bn=True)
+        x = AddConv2D(x, 1, [1,1], 1, 0., "same", activation="sigmoid", bn=False)
         #x = AveragePooling2D(pool_size=(12,16))(x)
         x = AveragePooling2D(pool_size=(24,32))(x)
 
diff --git a/costar_models/scripts/ctp_model_tool b/costar_models/scripts/ctp_model_tool
index f4848697b..6d35fc2b2 100755
--- a/costar_models/scripts/ctp_model_tool
+++ b/costar_models/scripts/ctp_model_tool
@@ -27,10 +27,10 @@ def main(args):
             root += '.'
         root += tok
     if data_type == "npz":
-        dataset = NpzGeneratorDataset(root)
+        dataset = NpzGeneratorDataset(root, preload=args['preload'])
         data = dataset.load(success_only = args['success_only'])
     elif data_type == "h5f":
-        dataset = H5fGeneratorDataset(root)
+        dataset = H5fGeneratorDataset(root, preload=args['preload'])
         data = dataset.load(success_only = args['success_only'])
     else:
         raise NotImplementedError('data type not implemented: %s'%data_type)
diff --git a/docs/task_learning_experiments.md b/docs/task_learning_experiments.md
index d58312536..7afff67a8 100644
--- a/docs/task_learning_experiments.md
+++ b/docs/task_learning_experiments.md
@@ -85,6 +85,22 @@ rosrun costar_models ctp_model_tool --model pretrain_image --data_file suturing_
 rosrun costar_models ctp_model_tool --model pretrain_image_gan --data_file suturing_data.h5f --lr 0.001 --dropout_rate 0.2 --features jigsaws --batch_size 32
 ```
 
+#### Wasserstein GAN
+
+We also implemented Wasserstein GAN training.
+
+```
+# Run with wasserstein GAN loss
+rosrun costar_models ctp_model_tool --model pretrain_image_gan \
+  --features jigsaws --batch_size 64 --data_file suturing_data2.h5f \
+  --lr 0.00005 --optimizer rmsprop --steps_per_epoch 100 \
+  --dropout_rate 0.1 --load_model --preload  --wasserstein
+```
+
+Some options here:
+  - `--preload` will try to store the whole data set in memory for faster procesing
+  - `--wasserstein` will tell the GAN to try something different (wasserstein loss)
+
 ## Training On MARCC
 
 MARCC is our cluster for machine learning, equipped with a large set of Tesla K80 GPUs. We assume that when training on a cluster like MARCC, you will not want a full ROS workspace, so instead we assume you will install to some path $COSTAR_PLAN and just run scripts.
diff --git a/slurm/ctp.sh b/slurm/ctp.sh
index 5920024db..0433ad914 100755
--- a/slurm/ctp.sh
+++ b/slurm/ctp.sh
@@ -14,9 +14,9 @@ echo "Running $@ on $SLURMD_NODENAME ..."
 module load tensorflow/cuda-8.0/r1.3 
 
 export DATASET="ctp_dec"
-export train_discriminator=true
-export train_image_encoder=true
-export train_multi_encoder=true
+export train_discriminator=false
+export train_image_encoder=false
+export train_multi_encoder=false
 export train_predictor=false
 export learning_rate=$1
 export dropout=$2
@@ -93,18 +93,22 @@ then
     --batch_size 64
 fi
 
-$HOME/costar_plan/costar_models/scripts/ctp_model_tool \
-  --features multi \
-  -e 100 \
-  --model conditional_image \
-  --data_file $HOME/work/$DATASET.h5f \
-  --lr $learning_rate \
-  --dropout_rate $dropout \
-  --model_directory $MODELDIR/ \
-  --optimizer $optimizer \
-  --steps_per_epoch 500 \
-  --loss $loss \
-  --batch_size 64
+
+if $train_conditional_image
+then
+  $HOME/costar_plan/costar_models/scripts/ctp_model_tool \
+    --features multi \
+    -e 150 \
+    --model conditional_image \
+    --data_file $HOME/work/$DATASET.h5f \
+    --lr $learning_rate \
+    --dropout_rate $dropout \
+    --model_directory $MODELDIR/ \
+    --optimizer $optimizer \
+    --steps_per_epoch 500 \
+    --loss $loss \
+    --batch_size 64
+fi
 
 $HOME/costar_plan/costar_models/scripts/ctp_model_tool \
   --features multi \
diff --git a/slurm/ctp_husky.sh b/slurm/ctp_husky.sh
index b47462805..2aff7a8e8 100755
--- a/slurm/ctp_husky.sh
+++ b/slurm/ctp_husky.sh
@@ -15,7 +15,7 @@ module load tensorflow/cuda-8.0/r1.3
 
 export DATASET="husky_data"
 export train_discriminator=true
-export train_image_encoder=false
+export train_image_encoder=true
 export train_multi_encoder=false
 export train_predictor=false
 export train_gans=true
@@ -34,7 +34,7 @@ then
     --features multi \
     -e 100 \
     --model discriminator \
-    --data_file $HOME/work/$DATASET.h5f \
+    --data_file $HOME/work/$DATASET.npz \
     --features husky \
     --lr $learning_rate \
     --dropout_rate $dropout \
@@ -49,7 +49,7 @@ then
     --features multi \
     -e 100 \
     --model goal_discriminator \
-    --data_file $HOME/work/$DATASET.h5f \
+    --data_file $HOME/work/$DATASET.npz \
     --lr $learning_rate \
     --features husky \
     --dropout_rate $dropout \
diff --git a/slurm/ctp_suturing.sh b/slurm/ctp_suturing.sh
index 0ba78881b..a417a3d10 100755
--- a/slurm/ctp_suturing.sh
+++ b/slurm/ctp_suturing.sh
@@ -1,5 +1,5 @@
 #!/bin/bash -l
-#SBATCH --job-name=ctpHusky
+#SBATCH --job-name=jigsaws
 #SBATCH --time=0-48:0:0
 #SBATCH --partition=gpu
 #SBATCH --gres=gpu:1
@@ -23,6 +23,40 @@ export noise_dim=$4
 export loss=$5
 export MODELDIR="$HOME/.costar/suturing_$learning_rate$optimizer$dropout$noise_dim$loss"
 
+if $train_discriminator
+then
+  echo "Training discriminator 1"
+  $HOME/costar_plan/costar_models/scripts/ctp_model_tool \
+    --features multi \
+    -e 100 \
+    --model discriminator \
+    --data_file $HOME/work/$DATASET.h5f \
+    --features jigsaws \
+    --lr $learning_rate \
+    --dropout_rate $dropout \
+    --model_directory $MODELDIR/ \
+    --optimizer $optimizer \
+    --steps_per_epoch 500 \
+    --noise_dim $noise_dim \
+    --loss $loss \
+    --batch_size 64
+  echo "Training discriminator 2"
+  $HOME/costar_plan/costar_models/scripts/ctp_model_tool \
+    --features multi \
+    -e 100 \
+    --model goal_discriminator \
+    --data_file $HOME/work/$DATASET.h5f \
+    --lr $learning_rate \
+    --features jigsaws \
+    --dropout_rate $dropout \
+    --model_directory $MODELDIR/ \
+    --optimizer $optimizer \
+    --steps_per_epoch 500 \
+    --noise_dim $noise_dim \
+    --loss $loss \
+    --batch_size 64
+fi
+
 
 
 if $train_image_encoder
@@ -32,7 +66,7 @@ then
     --features multi \
     -e 100 \
     --model pretrain_image_encoder \
-    --data_file $HOME/work/$DATASET.npz \
+    --data_file $HOME/work/$DATASET.h5f \
     --lr $learning_rate \
     --dropout_rate $dropout \
     --features jigsaws \
@@ -49,10 +83,11 @@ $HOME/costar_plan/costar_models/scripts/ctp_model_tool \
   --features multi \
   -e 100 \
   --model conditional_image \
-  --data_file $HOME/work/$DATASET.npz \
+  --data_file $HOME/work/$DATASET.h5f \
   --lr $learning_rate \
   --dropout_rate $dropout \
   --model_directory $MODELDIR/ \
+  --features jigsaws \
   --optimizer $optimizer \
   --use_noise true \
   --steps_per_epoch 500 \