kakaobrain · chenxwh · Dec 21, 2021 · Aug 9, 2022
diff --git a/README.md b/README.md
@@ -6,6 +6,7 @@ image-text pairs for non-commercial purposes.
 ![a painting of a bird in the style of asian painting](assets/bird_asian_painting_style.gif)
 ![a photo of san francisco's golden gate bridge in black and white tone](assets/golden_gate_black_and_white_tone.gif)
 
+[![Replicate](https://replicate.com/saehoonkim/mindall-e/badge)](https://replicate.ai/saehoonkim/mindall-e)
 
 ## Environment Setup
 - Basic setup

diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,22 @@
+build:
+  cuda: "10.2"
+  gpu: true
+  python_version: "3.8"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_packages:
+    - "torch==1.10.0"
+    - "torchvision==0.11.1"
+    - "tokenizers==0.10.2"
+    - "pyflakes==2.2.0"
+    - "tqdm==4.46.0"
+    - "pytorch-lightning==1.5"
+    - "einops==0.3.2"
+    - "omegaconf==2.1.1"
+    - "matplotlib==3.5.1"
+    - "ipython==7.30.1"
+  run:
+    - pip install git+https://github.com/openai/CLIP.git
+
+predict: "predict.py:Predictor"
diff --git a/predict.py b/predict.py
@@ -0,0 +1,73 @@
+"""
+download the weights to 'pretrained' first
+wget https://arena.kakaocdn.net/brainrepo/models/minDALL-E/57b008f02ceaa02b779c8b7463143315/1.3B.tar.gz
+tar -xvf 1.3B.tar.gz
+"""
+from typing import List
+import tempfile
+import numpy as np
+from pathlib import Path
+from PIL import Image, ImageOps
+from cog import BasePredictor, Path, Input, BaseModel
+import clip
+
+from dalle.models import Dalle
+from dalle.utils.utils import set_seed, clip_score
+
+
+class ModelOutput(BaseModel):
+    image: Path
+
+
+class Predictor(BasePredictor):
+    def setup(self):
+        self.device = 'cuda:0'
+        self.model = Dalle.from_pretrained("pretrained/1.3B")
+        self.model.to(device=self.device)
+        self.model_clip, self.preprocess_clip = clip.load("ViT-B/32", device=self.device)
+        self.model_clip.to(device=self.device)
+
+    def predict(
+        self,
+        prompt: str = Input(
+            description="Prompt for generating image.",
+        ),
+        num_samples: int = Input(
+            default=4,
+            ge=1,
+            le=9,
+            description="Number of generated images.",
+        ),
+        seed: int = Input(
+            default=0,
+            description="Set seed. 0 for random seed.",
+        ),
+    ) -> List[ModelOutput]:
+        softmax_temperature = 1
+        top_k = 256
+        num_candidates = 30
+        set_seed(seed)
+
+        images = self.model.sampling(prompt=prompt,
+                                     top_k=top_k,
+                                     top_p=None,
+                                     softmax_temperature=softmax_temperature,
+                                     num_candidates=num_candidates,
+                                     device=self.device).cpu().numpy()
+        images = np.transpose(images, (0, 2, 3, 1))
+
+        # CLIP Re-ranking
+        rank = clip_score(prompt=prompt, images=images, model_clip=self.model_clip,
+                          preprocess_clip=self.preprocess_clip, device=self.device)
+        images = images[rank]
+
+        images = images[:num_samples]
+        print(type(images[0]))
+        output = []
+        for i, array in enumerate(images):
+            img = Image.fromarray((images[i] * 255).astype(np.uint8))
+            output_path = Path(tempfile.mkdtemp()) / f"output_{i}.png"
+            img.save(str(output_path))
+            img.save(f'hi_{i}.png')
+            output.append(ModelOutput(image=output_path))
+        return output