Skip to content

Commit

Permalink
conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
horheynm committed Dec 12, 2024
2 parents 69ab73d + 540d4b2 commit c8b45ab
Show file tree
Hide file tree
Showing 69 changed files with 731 additions and 816 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch
from loguru import logger
from transformers import AutoModelForCausalLM

from llmcompressor.transformers import apply
Expand Down Expand Up @@ -52,3 +53,7 @@
lr_scheduler_type=lr_scheduler_type,
warmup_ratio=warmup_ratio,
)
logger.info(
"Note: vLLM requires the dtype=torch.float16 when running the ",
"compressed marlin-24 model",
)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
"pytest-mock>=3.6.0",
"pytest-rerunfailures>=13.0",
"parameterized",
"lm_eval==0.4.5",
# example test dependencies
"beautifulsoup4~=4.12.3",
"cmarkgfm~=2024.1.14",
Expand Down
7 changes: 3 additions & 4 deletions src/llmcompressor/modifiers/modifier.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
from abc import ABC, abstractmethod
from abc import abstractmethod
from typing import Optional

from pydantic import BaseModel

from llmcompressor.core.events import Event, EventType
from llmcompressor.core.state import State
from llmcompressor.modifiers.interface import ModifierInterface
from llmcompressor.modifiers.utils.hooks import HooksMixin

__all__ = ["Modifier"]


class Modifier(BaseModel, ModifierInterface, ABC):
class Modifier(ModifierInterface, HooksMixin):
"""
A base class for all modifiers to inherit from.
Modifiers are used to modify the training process for a model.
Expand Down
72 changes: 35 additions & 37 deletions src/llmcompressor/modifiers/obcq/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import partial
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -130,7 +131,8 @@ def initialize_compression(
"Inferring layer-wise sparsities from "
f"{len(dataloader)} calibration samples..."
)
self.sparsity = self._infer_layer_sparsity(dataloader)
activations = self._get_activations(dataloader)
self.sparsity = self._infer_layer_sparsity(activations)
self._validate_layerwise_sparsity()

for idx, (name, layer) in enumerate(self.compressible_layers_.items()):
Expand Down Expand Up @@ -254,19 +256,17 @@ def _infer_mask_block_size(self):

self.prunen_, self.prunem_ = list(map(int, self.mask_structure.split(":")))

def _infer_layer_sparsity(self, calibration_dataloader):
acts = _get_activations(self.model, calibration_dataloader)
def _infer_layer_sparsity(self, activations):
sparsegpt_groups = {}
for name, layer in self.compressible_layers_.items():
prunable_layers = get_prunable_layers(layer)
z = [
m.weight.abs() * acts[f"{name}.{n}"].unsqueeze(0)
m.weight.abs() * activations[f"{name}.{n}"].unsqueeze(0)
for n, m in prunable_layers.items()
]
sparsegpt_groups[name] = torch.cat([item.flatten().cpu() for item in z])

acts = None
del acts
del activations
torch.cuda.empty_cache()

outlier_ratios = {}
Expand Down Expand Up @@ -300,36 +300,34 @@ def _infer_layer_sparsity(self, calibration_dataloader):
logger.info(f"Sparsity for {k}: {sparsities[k]}")
return sparsities

@torch.no_grad()
def _get_activations(self, data_loader, nsamples=128):
self.model.eval()
acts = {}

def save_acts(module, input, name):
if isinstance(input, tuple):
input = input[0]
if name not in acts:
acts[name] = (
1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()
)
else:
acts[name] += (
1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()
)

for name, mod in self.model.named_modules():
if isinstance(mod, torch.nn.Linear) and "lm_head" not in name:
self.register_hook(mod, partial(save_acts, name=name), "forward_pre")

device = next(self.model.parameters()).device
for batch in tqdm(data_loader):
batch = {k: v.to(device) for k, v in batch.items()}
self.model(**batch)
batch = None
torch.cuda.empty_cache()

@torch.no_grad()
def _get_activations(model, data_loader, nsamples=128):
import functools

model.eval()
acts = {}

def save_acts(module, input, name):
if isinstance(input, tuple):
input = input[0]
if name not in acts:
acts[name] = 1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()
else:
acts[name] += 1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()

hooks = []
for name, mod in model.named_modules():
if isinstance(mod, torch.nn.Linear) and "lm_head" not in name:
hooks.append(
mod.register_forward_pre_hook(functools.partial(save_acts, name=name))
)
device = next(model.parameters()).device
for batch in tqdm(data_loader):
batch = {k: v.to(device) for k, v in batch.items()}
model(**batch)
batch = None
torch.cuda.empty_cache()

for h in hooks:
h.remove()
self.remove_hooks()

return acts
return acts
21 changes: 5 additions & 16 deletions src/llmcompressor/modifiers/pruning/utils/pytorch/layer_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from typing import Dict

import torch
from pydantic import BaseModel
from torch.nn import Parameter
from torch.utils.hooks import RemovableHandle

from llmcompressor.core import ModelParameterizedLayer
from llmcompressor.modifiers.utils.hooks import HooksMixin

__all__ = ["LayerParamMasking", "param_mask_name"]

Expand Down Expand Up @@ -39,11 +38,9 @@ class ParameterizedLayerMaskSettings:
use_hooks: bool = False


class LayerParamMasking(BaseModel):
class LayerParamMasking(HooksMixin):
_mask_settings: Dict[str, ParameterizedLayerMaskSettings] = {}
_masked_layer_params: Dict[str, ModelParameterizedLayer] = {}
_forward_hooks: Dict[str, RemovableHandle] = {}
_backward_hooks: Dict[str, RemovableHandle] = {}
enabled_: bool = False

def add_mask(
Expand Down Expand Up @@ -100,12 +97,8 @@ def _backward_hook_fn(gradients):

return gradients

self._forward_hooks[layer_param_name] = (
parameterized_layer.layer.register_forward_hook(_forward_hook_fn)
)
self._backward_hooks[layer_param_name] = (
parameterized_layer.param.register_hook(_backward_hook_fn)
)
self.register_hook(parameterized_layer.layer, _forward_hook_fn, "forward")
self.register_hook(parameterized_layer.param, _backward_hook_fn, "")

def update_mask(
self,
Expand All @@ -131,11 +124,7 @@ def remove_mask(self, layer_param_name: str):
del self._mask_settings[layer_param_name]

if mask_settings.use_hooks:
self._forward_hooks[layer_param_name].remove()
self._backward_hooks[layer_param_name].remove()

del self._forward_hooks[layer_param_name]
del self._backward_hooks[layer_param_name]
self.remove_hooks()

def apply_mask_weight(self, layer_param_name: str):
if not self.enabled_:
Expand Down
72 changes: 35 additions & 37 deletions src/llmcompressor/modifiers/pruning/wanda/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import partial
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -121,7 +122,8 @@ def initialize_compression(
"Inferring layer-wise sparsities from "
f"{len(dataloader) if dataloader else 0} calibration samples..."
)
self.sparsity = self._infer_layer_sparsity(dataloader)
activations = self._get_activations(dataloader)
self.sparsity = self._infer_layer_sparsity(activations)
self._validate_layerwise_sparsity()

for idx, (name, layer) in enumerate(self.compressible_layers_.items()):
Expand Down Expand Up @@ -224,19 +226,17 @@ def _infer_mask_block_size(self):

self.prunen_, self.prunem_ = list(map(int, self.mask_structure.split(":")))

def _infer_layer_sparsity(self, calibration_dataloader):
acts = _get_activations(self.model, calibration_dataloader)
def _infer_layer_sparsity(self, activations):
wanda = {}
for name, layer in self.compressible_layers_.items():
prunable_layers = get_prunable_layers(layer)
z = [
m.weight.abs() * acts[f"{name}.{n}"].unsqueeze(0)
m.weight.abs() * activations[f"{name}.{n}"].unsqueeze(0)
for n, m in prunable_layers.items()
]
wanda[name] = torch.cat([item.flatten().cpu() for item in z])

acts = None
del acts
del activations
torch.cuda.empty_cache()

outlier_ratios = {}
Expand Down Expand Up @@ -268,36 +268,34 @@ def _infer_layer_sparsity(self, calibration_dataloader):
logger.info(f"Sparsity for {k}: {sparsities[k]}")
return sparsities

@torch.no_grad()
def _get_activations(self, data_loader, nsamples=128):
self.model.eval()
acts = {}

def save_acts(module, input, name):
if isinstance(input, tuple):
input = input[0]
if name not in acts:
acts[name] = (
1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()
)
else:
acts[name] += (
1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()
)

for name, mod in self.model.named_modules():
if isinstance(mod, torch.nn.Linear) and "lm_head" not in name:
self.register_hook(mod, partial(save_acts, name=name), "forward_pre")

device = next(self.model.parameters()).device
for batch in tqdm(data_loader):
batch = {k: v.to(device) for k, v in batch.items()}
self.model(**batch)
batch = None
torch.cuda.empty_cache()

@torch.no_grad()
def _get_activations(model, data_loader, nsamples=128):
import functools

model.eval()
acts = {}

def save_acts(module, input, name):
if isinstance(input, tuple):
input = input[0]
if name not in acts:
acts[name] = 1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()
else:
acts[name] += 1.0 / nsamples * input.detach().pow(2).sum(dim=(0, 1)).sqrt()

hooks = []
for name, mod in model.named_modules():
if isinstance(mod, torch.nn.Linear) and "lm_head" not in name:
hooks.append(
mod.register_forward_pre_hook(functools.partial(save_acts, name=name))
)
device = next(model.parameters()).device
for batch in tqdm(data_loader):
batch = {k: v.to(device) for k, v in batch.items()}
model(**batch)
batch = None
torch.cuda.empty_cache()

for h in hooks:
h.remove()
self.remove_hooks()

return acts
return acts
Loading

0 comments on commit c8b45ab

Please sign in to comment.