Update README, LICENSE, refactor code

abhiskk · Apr 3, 2017 · 1f8de02 · 1f8de02
1 parent 536a799
commit 1f8de02
Show file tree

Hide file tree

Showing 15 changed files with 85 additions and 735 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,3 @@
-MSCOCO/
-
-model/
-
 .idea/
 
 # Byte-compiled / optimized / DLL files

diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # fast-neural-style :city_sunrise: :rocket:
-This repository contains a PyTorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting.
+This repository contains a pytorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting.
 
 The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf).
 
@@ -10,7 +10,7 @@ The model uses the method described in [Perceptual Losses for Real-Time Style Tr
 </p>
 
 ## Requirements
-The program is written in Python, and uses [PyTorch](http://pytorch.org/), [Scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up specially for training a new model. Regular sized images can be styled on a laptop, desktop using saved models.
+The program is written in Python, and uses [pytorch](http://pytorch.org/), [Scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up especially for training a new model. Regular sized images can be styled on a laptop, desktop using saved models.
 
 ## Usage
 Stylize image
@@ -24,27 +24,34 @@ python neural_style/neural_style.py eval --content-image </path/to/content/image
 
 Train model
 ```bash
-python neural_style/neural_style.py train --dataset </path/to/train-dataset> --vgg </path/to/vgg/folder> --save-model-dir </path/to/save-models/folder> --epochs 2 --cuda 1
+python neural_style/neural_style.py train --dataset </path/to/train-dataset> --style-image </path/to/style/image> --vgg-model-dir </path/to/vgg/folder> --save-model-dir </path/to/save-models/folder> --epochs 2 --cuda 1
 ```
 
 There are several command line arguments, the important ones are listed below
-* `--dataset`: path to training dataset, I used COCO 2014 Training images dataset [80K/13GB] [(download)](http://mscoco.org/dataset/#download).
-* `--vgg`: path to folder where the vgg model will be downloaded.
+* `--dataset`: path to training dataset, the path should point to a folder containing another folder with all the training images. I used COCO 2014 Training images dataset [80K/13GB] [(download)](http://mscoco.org/dataset/#download).
+* `--style-image`: path to style-image.
+* `--vgg-model-dir`: path to folder where the vgg model will be downloaded.
 * `--save-model-dir`: path to folder where trained model will be saved.
-* `--epochs`: train for these many iterations. The default is 2.
 * `--cuda`: set it to 1 for running on GPU, 0 for CPU.
 
 Refer to ``neural_style/neural_style.py`` for other command line arguments.
 
 ## Models
 
+The ``saved-models`` folder contains different style-models that you can use for styling your images. Below are the results of applying the style-models on the door arch image.
+
 <div align='center'>
-  <img src='images/content-images/amber.jpg' height="174px">
+  <img src='images/content-images/amber.jpg' height="174px">		
 </div>
 
 <div align='center'>
   <img src='images/style-images/mosaic.jpg' height="174px">
   <img src='images/output-images/amber-mosaic.jpg' height="174px">
   <img src='images/output-images/amber-candy.jpg' height="174px">
   <img src='images/style-images/candy.jpg' height="174px">
+  <br>
+  <img src='images/style-images/starry-night-cropped.jpg' height="174px">
+  <img src='images/output-images/amber-starry-night.jpg' height="174px">
+  <img src='images/output-images/amber-udnie.jpg' height="174px">
+  <img src='images/style-images/udnie.jpg' height="174px">
 </div>
diff --git a/images/content-images/chicago.jpg b/images/content-images/chicago.jpg
diff --git a/images/output-images/amber-starry-night.jpg b/images/output-images/amber-starry-night.jpg
diff --git a/images/output-images/amber-udnie.jpg b/images/output-images/amber-udnie.jpg
diff --git a/images/output-images/chicago-candy.jpg b/images/output-images/chicago-candy.jpg
diff --git a/images/output-images/chicago-mosaic.jpg b/images/output-images/chicago-mosaic.jpg
diff --git a/images/style-images/picasso-selfport1907.jpg b/images/style-images/picasso-selfport1907.jpg
diff --git a/images/style-images/starry-night-cropped.jpg b/images/style-images/starry-night-cropped.jpg
diff --git a/images/style-images/starry_night.jpg → images/style-images/starry-night.jpg b/images/style-images/starry_night.jpg → images/style-images/starry-night.jpg
diff --git a/neural_style/neural_style.py b/neural_style/neural_style.py
@@ -21,32 +21,15 @@ def train(args):
     torch.manual_seed(args.seed)
 
     if args.cuda and not torch.cuda.is_available():
-        print("WARNING: torch.cuda not available, using CPU.")
-        args.cuda = 0
+        print("ERROR: cuda is not available, try running on CPU")
+        sys.exit(1)
 
     if args.cuda:
         torch.cuda.manual_seed(args.seed)
         kwargs = {'num_workers': 0, 'pin_memory': False}
     else:
         kwargs = {}
 
-    print("=====================")
-    print("CURRENT TIME:", time.ctime())
-    print("PYTHON VERSION:", sys.version)
-    print("PYTORCH VERSION:", torch.__version__)
-    print("BATCH SIZE:", args.batch_size)
-    print("EPOCHS:", args.epochs)
-    print("RANDOM SEED:", args.seed)
-    print("CUDA:", args.cuda)
-    print("LEARNING RATE:", args.lr)
-    print("STYLE IMAGE:", args.style_image)
-    print("CONTENT WEIGHT:", args.content_weight)
-    print("STYLE WEIGHT:", args.style_weight)
-    print("DATASET:", args.dataset)
-    print("SAVE-MODEL DIRECTORY:", args.save_model_dir)
-    print("STYLE SIZE:", args.style_size)
-    print("=====================\n")
-
     transform = transforms.Compose([transforms.Scale(args.image_size),
                                     transforms.CenterCrop(args.image_size),
                                     transforms.ToTensor(),
@@ -133,7 +116,7 @@ def train(args):
     save_model_path = os.path.join(args.save_model_dir, save_model_filename)
     torch.save(transformer, save_model_path)
 
-    print("\nDone :)")
+    print("\nDone, trained model saved at", save_model_path)
 
 
 def check_paths(args):
@@ -160,27 +143,45 @@ def main():
     main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
     subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand")
 
-    train_arg_parser = subparsers.add_parser("train")
-    train_arg_parser.add_argument("--batch-size", type=int, default=4)
-    train_arg_parser.add_argument("--epochs", type=int, default=2)
-    train_arg_parser.add_argument("--vgg-model-dir", type=str, required=True)
-    train_arg_parser.add_argument("--seed", type=int, default=42)
-    train_arg_parser.add_argument("--cuda", type=int, required=True)
-    train_arg_parser.add_argument("--dataset", type=str, required=True)
-    train_arg_parser.add_argument("--image-size", type=int, default=256)
-    train_arg_parser.add_argument("--style-size", type=int, default=None)
-    train_arg_parser.add_argument("--lr", type=float, default=1e-3)
-    train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg")
-    train_arg_parser.add_argument("--content-weight", type=float, default=1.0)
-    train_arg_parser.add_argument("--style-weight", type=float, default=5.0)
-    train_arg_parser.add_argument("--log-interval", type=int, default=500)
-    train_arg_parser.add_argument("--save-model-dir", type=str, required=True)
-
-    eval_arg_parser = subparsers.add_parser("eval")
-    eval_arg_parser.add_argument("--content-image", type=str, required=True)
-    eval_arg_parser.add_argument("--content-scale", type=float, default=None)
-    eval_arg_parser.add_argument("--output-image", type=str, required=True)
-    eval_arg_parser.add_argument("--model", type=str, required=True)
+    train_arg_parser = subparsers.add_parser("train",
+                                             help="parser for training arguments")
+    train_arg_parser.add_argument("--epochs", type=int, default=2,
+                                  help="number of training epochs, default is 2")
+    train_arg_parser.add_argument("--batch-size", type=int, default=4,
+                                  help="batch size for training, default is 4")
+    train_arg_parser.add_argument("--dataset", type=str, required=True,
+                                  help="path to training dataset, the path should point to a folder "
+                                       "containing another folder with all the training images")
+    train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg",
+                                  help="path to style-image")
+    train_arg_parser.add_argument("--vgg-model-dir", type=str, required=True,
+                                  help="directory for vgg, if model is not present in the directory it is downloaded")
+    train_arg_parser.add_argument("--save-model-dir", type=str, required=True,
+                                  help="path to folder where trained model will be saved.")
+    train_arg_parser.add_argument("--image-size", type=int, default=256,
+                                  help="size of training images, default is 256 X 256")
+    train_arg_parser.add_argument("--style-size", type=int, default=None,
+                                  help="size of style-image, default is the original size of style image")
+    train_arg_parser.add_argument("--cuda", type=int, required=True, help="set it to 1 for running on GPU, 0 for CPU")
+    train_arg_parser.add_argument("--seed", type=int, default=42, help="random seed for training")
+    train_arg_parser.add_argument("--content-weight", type=float, default=1.0,
+                                  help="weight for content-loss, default is 1.0")
+    train_arg_parser.add_argument("--style-weight", type=float, default=5.0,
+                                  help="weight for style-loss, default is 5.0")
+    train_arg_parser.add_argument("--lr", type=float, default=1e-3,
+                                  help="learning rate, default is 0.001")
+    train_arg_parser.add_argument("--log-interval", type=int, default=500,
+                                  help="number of images after which the training loss is logged, default is 500")
+
+    eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments")
+    eval_arg_parser.add_argument("--content-image", type=str, required=True,
+                                 help="path to content image you want to stylize")
+    eval_arg_parser.add_argument("--content-scale", type=float, default=None,
+                                 help="factor for scaling down the content image")
+    eval_arg_parser.add_argument("--output-image", type=str, required=True,
+                                 help="path for saving the output image")
+    eval_arg_parser.add_argument("--model", type=str, required=True,
+                                 help="saved model to be used for stylizing the image")
 
     args = main_arg_parser.parse_args()
 

diff --git a/neural_style/utils.py b/neural_style/utils.py
@@ -9,7 +9,6 @@
 from vgg16 import Vgg16
 
 
-# result: RGB CxHxW [0,255] torch.FloatTensor
 def tensor_load_rgbimage(filename, size=None, scale=None):
     img = Image.open(filename)
     if size is not None:
@@ -21,14 +20,6 @@ def tensor_load_rgbimage(filename, size=None, scale=None):
     return img
 
 
-def gram_matrix(y):
-    (b, ch, h, w) = y.size()
-    features = y.view(b, ch, w * h)
-    features_t = features.transpose(1, 2)
-    gram = features.bmm(features_t) / (ch * h * w)
-    return gram
-
-
 def tensor_save_rgbimage(tensor, filename):
     img = tensor.clone().cpu().clamp(0, 255).numpy()
     img = img.transpose(1, 2, 0).astype('uint8')
@@ -42,6 +33,14 @@ def tensor_save_bgrimage(tensor, filename):
     tensor_save_rgbimage(tensor, filename)
 
 
+def gram_matrix(y):
+    (b, ch, h, w) = y.size()
+    features = y.view(b, ch, w * h)
+    features_t = features.transpose(1, 2)
+    gram = features.bmm(features_t) / (ch * h * w)
+    return gram
+
+
 def subtract_imagenet_mean_batch(batch):
     """Subtract ImageNet mean pixel-wise from a BGR image."""
     tensortype = type(batch.data)

diff --git a/saved-models/starry-night.model b/saved-models/starry-night.model
diff --git a/saved-models/udnie.model b/saved-models/udnie.model
-Original file line number
+Diff line change
@@ -1,7 +1,3 @@
-    MSCOCO/
-    model/
     .idea/
     # Byte-compiled / optimized / DLL files
@@ Expand Down @@