Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added warmup each cycle feature in CyclicalScheduler #3064

Merged
merged 18 commits into from
Sep 24, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions ignite/handlers/param_scheduler.py
sihyeong671 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def __init__(
self._state_attrs += ["param_group_index"]

def __call__(self, engine: Optional[Engine], name: Optional[str] = None) -> None:
value = self.get_param()
value = self._get_param()

if isinstance(value, list):
if len(value) != len(self.optimizer_param_groups):
Expand Down Expand Up @@ -261,6 +261,11 @@ def simulate_values(cls, num_events: int, **scheduler_kwargs: Any) -> List[List[
values.append([i, scheduler.optimizer_param_groups[0][scheduler.param_name]])
return values

def _get_param(self) -> Union[List[float], float]:
# `ParamScheduler` does nothing special, only returning what child class returns.
# Intermediate child classes edit this method
return self.get_param()


class CyclicalScheduler(ParamScheduler):
"""An abstract class for updating an optimizer's parameter value over a
Expand All @@ -279,6 +284,9 @@ class CyclicalScheduler(ParamScheduler):
end of each cycle (default=1.0).
end_value_mult: ratio by which to change the end value at the
end of each cycle (default=1.0).
cyclic_warmup_duration: duration of warm-up to be applied before each cycle.
Through this warm-up, the parameter starts from the last cycle's end value
and linearly goes to next cycle's start value. Default is no cyclic warm-up.
save_history: whether to log the parameter values to
`engine.state.param_history`, (default=False).
param_group_index: optimizer's parameters group to use.
Expand All @@ -288,6 +296,9 @@ class CyclicalScheduler(ParamScheduler):
usually be the number of batches in an epoch.

.. versionadded:: 0.4.5

.. versionchanged:: 0.4.13
Added cyclic warm-up to the scheduler using ``warmup_each_cycle`` and ``warmup_duration``.
vfdev-5 marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(
Expand All @@ -300,6 +311,7 @@ def __init__(
cycle_mult: float = 1.0,
start_value_mult: float = 1.0,
end_value_mult: float = 1.0,
cyclic_warmup_duration: int = 0,
save_history: bool = False,
param_group_index: Optional[int] = None,
):
Expand All @@ -308,11 +320,13 @@ def __init__(
)
self.start_value = start_value
self.end_value = end_value
self.cycle_size = int(cycle_size) # Ensure cycle_size is integer
self.cycle_size = cycle_size
self.cycle_mult = cycle_mult
self.cycle = 0
self.start_value_mult = start_value_mult
self.end_value_mult = end_value_mult
self.warmup_duration = cyclic_warmup_duration
vfdev-5 marked this conversation as resolved.
Show resolved Hide resolved
self.total_cycle_size = self.warmup_duration + self.cycle_size

if self.cycle_size < 2:
raise ValueError(f"Argument cycle_size should be positive and larger than 1, but given {cycle_size}")
Expand All @@ -325,18 +339,33 @@ def __init__(
"cycle",
"start_value_mult",
"end_value_mult",
"warmup_duration",
"total_cycle_size",
]

def __call__(self, engine: Optional[Engine], name: Optional[str] = None) -> None:
if self.event_index != 0 and self.event_index % self.cycle_size == 0:
if self.event_index != 0 and self.event_index == self.cycle_size:
self.start_value *= self.start_value_mult
if self.event_index != 0 and self.event_index == self.total_cycle_size:
self.event_index = 0
self.cycle_size = int(self.cycle_size * self.cycle_mult)
self.warmup_duration = int(self.warmup_duration * self.cycle_mult)
self.total_cycle_size = self.warmup_duration + self.cycle_size
self.cycle += 1
self.start_value *= self.start_value_mult
self.end_value *= self.end_value_mult

return super(CyclicalScheduler, self).__call__(engine, name)

def _get_param(self) -> Union[List[float], float]:
"""Applies warm-up if the scheduler is in the warm-up phase,
otherwise returns what is returned by `self.get_param()`
"""
if self.event_index > self.cycle_size:
warmup_progress = (self.event_index - self.cycle_size) / self.warmup_duration
return self.end_value + (self.start_value - self.end_value) * warmup_progress

return self.get_param()


class LinearCyclicalScheduler(CyclicalScheduler):
"""Linearly adjusts param value to 'end_value' for a half-cycle, then linearly
Expand All @@ -355,6 +384,9 @@ class LinearCyclicalScheduler(CyclicalScheduler):
end of each cycle (default=1.0).
end_value_mult: ratio by which to change the end value at the
end of each cycle (default=1.0).
cyclic_warmup_duration: duration of warm-up to be applied before each cycle.
Through this warm-up, the parameter starts from the last cycle's end value
and linearly goes to next cycle's start value. Default is no cyclic warm-up.
save_history: whether to log the parameter values to
`engine.state.param_history`, (default=False).
param_group_index: optimizer's parameters group to use.
Expand Down Expand Up @@ -433,6 +465,7 @@ def print_lr():
"""

def get_param(self) -> float:
"""Method to get current optimizer's parameter value"""
cycle_progress = self.event_index / self.cycle_size
return self.end_value + (self.start_value - self.end_value) * abs(cycle_progress - 0.5) * 2

Expand All @@ -456,6 +489,9 @@ class CosineAnnealingScheduler(CyclicalScheduler):
end of each cycle (default=1.0).
end_value_mult: ratio by which to change the end value at the
end of each cycle (default=1.0).
cyclic_warmup_duration: duration of warm-up to be applied before each cycle.
Through this warm-up, the parameter starts from the last cycle's end value
and linearly goes to next cycle's start value. Default is no cyclic warm-up.
save_history: whether to log the parameter values to
`engine.state.param_history`, (default=False).
param_group_index: optimizer's parameters group to use.
Expand Down
135 changes: 33 additions & 102 deletions tests/ignite/handlers/test_param_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_param_scheduler_asserts():
FakeParamScheduler({}, "lr")


def test_linear_scheduler():
def test_linear_scheduler_asserts():
with pytest.raises(TypeError, match=r"Argument optimizer should be torch.optim.Optimizer"):
LinearCyclicalScheduler({}, "lr", 1, 0, cycle_size=0)

Expand All @@ -68,6 +68,11 @@ def test_linear_scheduler():
with pytest.raises(ValueError, match=r"Argument cycle_size should be positive and larger than 1"):
LinearCyclicalScheduler(optimizer, "lr", 1, 0, cycle_size=1)


def test_linear_scheduler():
tensor = torch.zeros([1], requires_grad=True)
optimizer = torch.optim.SGD([tensor], lr=0.0)

scheduler = LinearCyclicalScheduler(optimizer, "lr", 1, 0, 10)
state_dict = scheduler.state_dict()

Expand All @@ -77,38 +82,12 @@ def save_lr(engine):
trainer = Engine(lambda engine, batch: None)
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)

lr_values_in_cycle = [1.0, 0.8, 0.6, 0.4, 0.2, 0.0, 0.2, 0.4, 0.6, 0.8]
for _ in range(2):
lrs = []
trainer.run([0] * 9, max_epochs=2)
trainer.run([0] * 10, max_epochs=2)

assert lrs == list(
map(
pytest.approx,
[
# Cycle 1
1.0,
0.8,
0.6,
0.4,
0.2,
0.0,
0.2,
0.4,
0.6,
0.8,
# Cycle 2
1.0,
0.8,
0.6,
0.4,
0.2,
0.0,
0.2,
0.4, # 0.6, 0.8,
],
)
)
assert lrs == pytest.approx([*lr_values_in_cycle, *lr_values_in_cycle])
scheduler.load_state_dict(state_dict)

optimizer = torch.optim.SGD([tensor], lr=0)
Expand Down Expand Up @@ -164,49 +143,6 @@ def save_lr(engine):
)
scheduler.load_state_dict(state_dict)

# With float cycle_size
optimizer = torch.optim.SGD([tensor], lr=0)
scheduler = LinearCyclicalScheduler(
optimizer, "lr", start_value=1.2, end_value=0.2, cycle_size=10.00000012, cycle_mult=1.0
)
state_dict = scheduler.state_dict()

trainer = Engine(lambda engine, batch: None)
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)

for _ in range(2):
lrs = []
trainer.run([0] * 9, max_epochs=2)
assert lrs == list(
map(
pytest.approx,
[
# Cycle 1
1.2,
1.0,
0.8,
0.6,
0.4,
0.2,
0.4,
0.6,
0.8,
1.0,
# Cycle 2
1.2,
1.0,
0.8,
0.6,
0.4,
0.2,
0.4,
0.6, # 0.8, 1.0,
],
)
)
scheduler.load_state_dict(state_dict)


def test_linear_scheduler_cycle_size_two():
tensor = torch.zeros([1], requires_grad=True)
Expand Down Expand Up @@ -239,17 +175,23 @@ def save_lr(engine):
assert lrs == pytest.approx([v for i, v in simulated_values])


def test_cosine_annealing_scheduler():
@pytest.mark.parametrize("cyclic_warmup", [False, True])
def test_cosine_annealing_scheduler(cyclic_warmup):
tensor = torch.zeros([1], requires_grad=True)
optimizer = torch.optim.SGD([tensor], lr=0)

scheduler = CosineAnnealingScheduler(optimizer, "lr", 0, 1, 10)
scheduler = CosineAnnealingScheduler(optimizer, "lr", 0, 1, 10, cyclic_warmup_duration=2 if cyclic_warmup else 0)
state_dict = scheduler.state_dict()

data = [0] * 9
data = [0] * (10 + int(cyclic_warmup))
max_epochs = 2
simulated_values = CosineAnnealingScheduler.simulate_values(
num_events=len(data) * max_epochs, param_name="lr", start_value=0, end_value=1, cycle_size=10
num_events=len(data) * max_epochs,
param_name="lr",
start_value=0,
end_value=1,
cycle_size=10,
cyclic_warmup_duration=2 if cyclic_warmup else 0,
)

def save_lr(engine):
Expand All @@ -258,36 +200,25 @@ def save_lr(engine):
trainer = Engine(lambda engine, batch: None)
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
trainer.add_event_handler(Events.ITERATION_COMPLETED, save_lr)
lr_values_in_cycle = [
0.0,
0.02447174185242318,
0.09549150281252627,
0.20610737385376332,
0.3454915028125263,
0.5,
0.6545084971874737,
0.7938926261462365,
0.9045084971874737,
0.9755282581475768,
]
lr_values_in_warmup = np.linspace(1.0, 0.0, 2 + 1)[:-1].tolist() if cyclic_warmup else []

for _ in range(2):
lrs = []
trainer.run(data, max_epochs=max_epochs)

assert lrs == list(
map(
pytest.approx,
[
0.0,
0.02447174185242318,
0.09549150281252627,
0.20610737385376332,
0.3454915028125263,
0.5,
0.6545084971874737,
0.7938926261462365,
0.9045084971874737,
0.9755282581475768,
0.0,
0.02447174185242318,
0.09549150281252627,
0.20610737385376332,
0.3454915028125263,
0.5,
0.6545084971874737,
0.7938926261462365, # 0.9045084971874737, 0.9755282581475768
],
)
)
assert lrs == pytest.approx([*lr_values_in_cycle, *lr_values_in_warmup, *lr_values_in_cycle])
scheduler.load_state_dict(state_dict)

assert lrs == pytest.approx([v for i, v in simulated_values])
Expand Down
Loading