Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: Aleph-Alpha/magma
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: Quentin-Anthony/magma
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 14 commits
  • 14 files changed
  • 4 contributors

Commits on Jan 15, 2023

  1. Copy the full SHA
    5b11fda View commit details
  2. add hf accelerate

    Quentin-Anthony authored Jan 15, 2023
    Copy the full SHA
    9b85824 View commit details

Commits on Jan 17, 2023

  1. add cmake

    Quentin-Anthony authored Jan 17, 2023
    Copy the full SHA
    36f5a65 View commit details
  2. add einops

    Quentin-Anthony authored Jan 17, 2023
    Copy the full SHA
    513a441 View commit details

Commits on Jan 27, 2023

  1. Copy the full SHA
    3bf6e6e View commit details
  2. Merge pull request #1 from Quentin-Anthony/device-fix

    Fixed magma constructor to properly update subcomponent devices
    Quentin-Anthony authored Jan 27, 2023
    Copy the full SHA
    b30539d View commit details

Commits on Mar 8, 2023

  1. Fix OOM

    Quentin-Anthony committed Mar 8, 2023
    Copy the full SHA
    de69f6d View commit details

Commits on Mar 20, 2023

  1. Copy the full SHA
    4d8d4d7 View commit details

Commits on Mar 23, 2023

  1. fix model params mismatch

    floatingsnake committed Mar 23, 2023
    Copy the full SHA
    5444e30 View commit details
  2. Merge pull request #3 from floatingsnake/master

    fix model params num mismatch
    Quentin-Anthony authored Mar 23, 2023
    Copy the full SHA
    27f81d6 View commit details
  3. Merge pull request #2 from Quentin-Anthony/shape-fix

    Fixed shape error by unqueezing logits
    Quentin-Anthony authored Mar 23, 2023
    Copy the full SHA
    d793893 View commit details
  4. Copy the full SHA
    ed3a5b0 View commit details

Commits on Mar 31, 2023

  1. Copy the full SHA
    1334bad View commit details
  2. Merge pull request #6 from Quentin-Anthony/Quentin-Anthony-patch-1

    Add rank, local_rank, world_size for any launching mechanism
    Quentin-Anthony authored Mar 31, 2023
    Copy the full SHA
    bbe3696 View commit details
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -3,3 +3,4 @@
*.pyc
wandb/*
aws*
*.env*
40 changes: 40 additions & 0 deletions configs/summit_clipH_neox20B.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
# image encoder settings
encoder_name: 'openclip-H',
adapter_config: {"mlp": {"adapter_type": "normal", "downsample_factor": 8}, "attention": {"adapter_type": "normal", "downsample_factor": 8}},
freeze_img_encoder: false,

# language model settings
lm_name: "neox",
lm_path: "EleutherAI/gpt-neox-20b",

# train settings
batch_size: 256,
train_steps: 150000,
lr: 8.0e-4,
min_lr: 0.0,
lr_decay_iters: 300000,
image_enc_lr: 2.0e-6,
use_image_embed_layernorm: true,
image_embed_dropout_prob: 0.1,
image_size: 384,

gradient_accumulation_steps: 4,
zero_stage: 2,
gradient_clipping: 1.0,

# dataset / save / load settings
# dataset_type: 'new',
train_dataset_dir: ['/gpfs/alpine/csc499/proj-shared/magma/flickr8k_processed'], #'/mnt/brick/wit_converted'

eval_dataset_dir: null, # if this is none, train dataset will be split
# vqa_dir: "/mnt/localdisk/vqa_val_converted",
# gqa_dir: "/mnt/localdisk/gqa_val_converted",

save: "checkpoints/MAGMA_20B_clipH",
load: "checkpoints/MAGMA_20B_clipH",

eval_every: 250,
wandb_project: "MAGMA_20B_clipH",
name: "MAGMA_20B_clipH"
}
40 changes: 40 additions & 0 deletions configs/summit_clipH_pythia70m.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
# image encoder settings
encoder_name: 'openclip-H',
adapter_config: {"mlp": {"adapter_type": "normal", "downsample_factor": 8}, "attention": {"adapter_type": "normal", "downsample_factor": 8}},
freeze_img_encoder: false,

# language model settings
lm_name: "neox",
lm_path: "EleutherAI/pythia-70m-deduped",

# train settings
batch_size: 256,
train_steps: 150000,
lr: 8.0e-4,
min_lr: 0.0,
lr_decay_iters: 300000,
image_enc_lr: 2.0e-6,
use_image_embed_layernorm: true,
image_embed_dropout_prob: 0.1,
image_size: 384,

gradient_accumulation_steps: 4,
zero_stage: 2,
gradient_clipping: 1.0,

# dataset / save / load settings
# dataset_type: 'new',
train_dataset_dir: ['/gpfs/alpine/csc499/proj-shared/magma/flickr8k_processed'], #'/mnt/brick/wit_converted'

eval_dataset_dir: null, # if this is none, train dataset will be split
# vqa_dir: "/mnt/localdisk/vqa_val_converted",
# gqa_dir: "/mnt/localdisk/gqa_val_converted",

save: "checkpoints/MAGMA_19M_clipH",
load: "checkpoints/MAGMA_19M_clipH",

eval_every: 250,
wandb_project: "MAGMA_19M_clipH",
name: "MAGMA_19M_clipH"
}
21 changes: 21 additions & 0 deletions convert_flickr8k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from magma.datasets.convert_datasets import convert_dataset
import csv
from pathlib import Path

def my_dataset_iterator():
"""
Implement an iterator for your dataset that for every datapoint yields a tuple
image_path, {"captions": [...], "metadata": {...}, }, where image_path is the path to the image as a Path object, captions is a list of caption strings and metadata is an optional field.
"""
with open("/gpfs/alpine/csc499/proj-shared/magma/flickr8k/captions.txt") as f:
default_iter = csv.reader(f)

custom_iter = []
next(default_iter)
for row in default_iter:
custom_iter.append((Path('/gpfs/alpine/csc499/proj-shared/magma/flickr8k/images/' + row[0]), {"captions": row[1]}))
return iter(custom_iter)


if __name__ == "__main__":
convert_dataset(data_dir="/gpfs/alpine/csc499/proj-shared/magma/flickr8k_processed", ds_iterator=my_dataset_iterator(), mode='cp')
5 changes: 2 additions & 3 deletions example_inference.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from magma import Magma
from magma.image_input import ImageInput

model = Magma.from_checkpoint(
config_path = "configs/MAGMA_v1.yml",
checkpoint_path = "./mp_rank_00_model_states.pt",
model = Magma(
config = "configs/summit_clipH_pythia19m.yml",
device = 'cuda:0'
)

5 changes: 5 additions & 0 deletions magma/config.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
import yaml
from pprint import pprint
from .utils import is_main
from typing import Optional
import os
from pathlib import Path
import uuid
@@ -42,6 +43,10 @@ class MultimodalConfig:
fine_tune: bool = False
load_optimizer: bool = True

# Language model:
lm_name: str = "gptj"
lm_path: Optional[str] = None

# Checkpointing:
# ------------------------------------------------------------
save_every: int = 2500
37 changes: 29 additions & 8 deletions magma/datasets/convert_datasets.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ def save_to_jsons(data_list, target_dir, starting_idx=0):


def save_images(img_list, target_dir, mode="mv"):
print(f'img_list: {img_list}')
for img_path in tqdm(
img_list,
desc=f"saving {len(img_list)} images (mode={mode}) to {str(target_dir)}",
@@ -34,7 +35,7 @@ def convert_dataset(
data_dir,
dir_size=10000,
hash_fn=None,
mode="mv",
mode="cp",
ds_iterator=None,
):
"""
@@ -62,12 +63,13 @@ def convert_dataset(
enumerate(ds_iterator),
desc="converting dataset to standard format...",
)

final_iter = 0
for k, (img_path, data) in pbar:
img_cpt_data = {}
# get img data
img_cpt_data.update(data)

#print(f'img_path: {str(img_path)}')
if str(img_path) in new_img_locations.keys():
# if filename is in the dictionary, it already has a new location
new_img_path = new_img_locations[str(img_path)]["new_img_path"]
@@ -78,7 +80,8 @@ def convert_dataset(
]["hash"]
else:
# if file exists in the old location, it will get moved to a new directory
new_img_path = f"images/{save_img_dir.name}/{img_path.name}"
#new_img_path = f"images/{save_img_dir}/{img_path}"
new_img_path = f"{img_path}"
img_cpt_data["image_path"] = new_img_path
new_img_locations[str(img_path)] = {"new_img_path": new_img_path}
# original location is saved an later saved to the new directory
@@ -97,22 +100,40 @@ def convert_dataset(

img_data_list.append(img_cpt_data)

if k % 10000 - 1 == 0:
print(f'img_path_list len: {len(img_path_list)}')


# save images in specified images folder (maximum of dir_size images per folder)
if (len(img_path_list) % dir_size == 0 and len(img_path_list) > 0) or (
k == len(ds_iterator) - 1
):
if (len(img_path_list) % dir_size == 0 and len(img_path_list) > 0):
print(f"saving {len(img_path_list)} images...")
os.makedirs(save_img_dir, exist_ok=True)
save_images(img_path_list, save_img_dir, mode=mode)
img_path_list = []
num_img_dirs += 1
save_img_dir = data_dir / "images" / f"{num_img_dirs}/"

# save jdon data in specified image_data folder with consecutive labeling of the json files
if ((k + 1) % dir_size == 0) or (k == len(ds_iterator) - 1):
# save json data in specified image_data folder with consecutive labeling of the json files
if ((k + 1) % dir_size == 0):
os.makedirs(save_data_dir, exist_ok=True)
save_to_jsons(
img_data_list, save_data_dir, starting_idx=max(k + 1 - dir_size, 0)
)
# empty path and data lists and update save directories for next saving step
img_data_list = []
save_data_dir = data_dir / "image_data" / f"{int((k+1)/dir_size)}/"
final_iter = k

os.makedirs(save_img_dir, exist_ok=True)
save_images(img_path_list, save_img_dir, mode=mode)
img_path_list = []
num_img_dirs += 1
save_img_dir = data_dir / "images" / f"{num_img_dirs}/"

os.makedirs(save_data_dir, exist_ok=True)
save_to_jsons(
img_data_list, save_data_dir, starting_idx=max(final_iter + 1 - dir_size, 0)
)
# empty path and data lists and update save directories for next saving step
img_data_list = []
save_data_dir = data_dir / "image_data" / f"{int((final_iter+1)/dir_size)}/"
56 changes: 43 additions & 13 deletions magma/image_encoders.py
Original file line number Diff line number Diff line change
@@ -4,15 +4,15 @@
from torchtyping import patch_typeguard
from einops import rearrange
import timm
import clip
import open_clip
from functools import partial

# ----------------------------- Utils --------------------------------------

clip.model.LayerNorm = (
nn.LayerNorm
) # we need to patch this for clip to work with deepspeed
patch_typeguard() # needed for torchtyping typechecks to work
# clip.model.LayerNorm = (
# nn.LayerNorm
# ) # we need to patch this for clip to work with deepspeed
# patch_typeguard() # needed for torchtyping typechecks to work


class Lambda(torch.nn.Module):
@@ -54,25 +54,55 @@ def clip_encoder(
If the variant is a resnet model, we also remove the attention pooling.
"""
if name in ["clip", "ViT-B/32"]:
name = "ViT-B/32"
name, pretrained = "ViT-B-32", "openai"
elif name in ["clip_resnet", "RN50x4"]:
name = "RN50x4"
name, pretrained = "RN50x4", "openai"
elif name in ["clip_resnet_large", "RN50x16"]:
name = "RN50x16"
name, pretrained = "RN50x16", "openai"
elif "openclip" in name:
if "H" in name:
name, pretrained = "ViT-H-14", "laion2b_s32b_b79k"
elif "B" in name and "32" in name:
name, pretrained = "ViT-B-32", "laion2b_s34b_b79k"
else:
raise NotImplementedError(f"Encoder {name} not recognized")
else:
raise ValueError(f"encoder {name} not recognized")
raise NotImplementedError(f"Encoder {name} not recognized")

encoder = clip.load(name, device=device)[0].visual

if device is not None:
encoder = encoder.to(device)
# TODO better internet connection
encoder = open_clip.create_model(name, device=device, precision="fp16" if "cuda" in str(device) else "fp32").visual # , pretrained=pretrained).visual

if "RN" in name:
# remove attention pooling
encoder.attnpool = Lambda(
partial(rearrange, pattern="b d h w -> b (h w) d")
) # remove attn pooling, just use reshaped features

if False and hasattr(encoder, "transformer"): # TODO when do we want to disable pooling?
def forward(self, x: torch.Tensor):
x = self.conv1(x) # shape = [*, width, grid, grid]
x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2]
x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width]
x = torch.cat(
[self.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device),
x], dim=1) # shape = [*, grid ** 2 + 1, width]
x = x + self.positional_embedding.to(x.dtype)

## a patch_dropout of 0. would mean it is disabled and this function would do nothing but return what was passed in
# x = self.patch_dropout(x)
x = self.ln_pre(x)

x = x.permute(1, 0, 2) # NLD -> LND
x = self.transformer(x)
x = self.ln_post(x)
x = x.permute(1, 0, 2) # LND -> NLD
return x
encoder.forward = partial(forward, encoder)


if device is not None:
encoder = encoder.to(device)

return encoder


7 changes: 7 additions & 0 deletions magma/image_prefix.py
Original file line number Diff line number Diff line change
@@ -11,13 +11,15 @@
ENCODER_SEQ_LENS = {
"clip_resnet": 49,
"clip_resnet_large": 144,
"openclip-H": 257
}

ENCODER_OUT_DIMS = {
"nfresnet50": 2048,
"clip": 512,
"clip_resnet": 2560,
"clip_resnet_large": 3072,
"openclip-H": 1024,
}


@@ -48,6 +50,7 @@ def __init__(
# get image encoder backbone
self.enc = get_image_encoder(
config.encoder_name,
# device=self.device,
pretrained=config.pretrained_img_encoder,
)
self.encoder_out_dim = ENCODER_OUT_DIMS[
@@ -106,4 +109,8 @@ def forward(
if self.use_layernorm:
logits = self.ln(logits)

# Added for shape mismatch.
if logits.ndim == 2:
logits = logits.unsqueeze(1)

return logits
Loading