Spaces:

KdaiP
/

StableTTS1.1

Running

App Files Files Community

StableTTS1.1 / utils /scheduler.py

KdaiP

Upload 80 files

3dd84f8 verified 5 months ago

raw

history blame contribute delete

17.7 kB

	# modified from transformers.optimization
	import math
	from functools import partial

	import torch
	from torch import nn
	from torch.optim import Optimizer
	from torch.optim.lr_scheduler import LambdaLR, ReduceLROnPlateau

	def _get_constant_lambda(_=None):
	return 1


	def get_constant_schedule(optimizer: Optimizer, last_epoch: int = -1):
	"""
	Create a schedule with a constant learning rate, using the learning rate set in optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""

	return LambdaLR(optimizer, _get_constant_lambda, last_epoch=last_epoch)


	def get_reduce_on_plateau_schedule(optimizer: Optimizer, **kwargs):
	"""
	Create a schedule with a constant learning rate that decreases when a metric has stopped improving.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	kwargs (`dict`, optional):
	Extra parameters to be passed to the scheduler. See `torch.optim.lr_scheduler.ReduceLROnPlateau`
	for possible parameters.

	Return:
	`torch.optim.lr_scheduler.ReduceLROnPlateau` with the appropriate schedule.
	"""

	return ReduceLROnPlateau(optimizer, **kwargs)


	def _get_constant_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1.0, num_warmup_steps))
	return 1.0


	def get_constant_schedule_with_warmup(optimizer: Optimizer, num_warmup_steps: int, last_epoch: int = -1):
	"""
	Create a schedule with a constant learning rate preceded by a warmup period during which the learning rate
	increases linearly between 0 and the initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""

	lr_lambda = partial(_get_constant_schedule_with_warmup_lr_lambda, num_warmup_steps=num_warmup_steps)
	return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)


	def _get_linear_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	return max(0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps)))


	def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):
	"""
	Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
	a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	num_training_steps (`int`):
	The total number of training steps.
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""

	lr_lambda = partial(
	_get_linear_schedule_with_warmup_lr_lambda,
	num_warmup_steps=num_warmup_steps,
	num_training_steps=num_training_steps,
	)
	return LambdaLR(optimizer, lr_lambda, last_epoch)


	def _get_cosine_schedule_with_warmup_lr_lambda(
	current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float
	):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
	return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))


	def get_cosine_schedule_with_warmup(
	optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1
	):
	"""
	Create a schedule with a learning rate that decreases following the values of the cosine function between the
	initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
	initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	num_training_steps (`int`):
	The total number of training steps.
	num_cycles (`float`, optional, defaults to 0.5):
	The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
	following a half-cosine).
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""

	lr_lambda = partial(
	_get_cosine_schedule_with_warmup_lr_lambda,
	num_warmup_steps=num_warmup_steps,
	num_training_steps=num_training_steps,
	num_cycles=num_cycles,
	)
	return LambdaLR(optimizer, lr_lambda, last_epoch)


	def _get_cosine_with_hard_restarts_schedule_with_warmup_lr_lambda(
	current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: int
	):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
	if progress >= 1.0:
	return 0.0
	return max(0.0, 0.5 * (1.0 + math.cos(math.pi * ((float(num_cycles) * progress) % 1.0))))


	def get_cosine_with_hard_restarts_schedule_with_warmup(
	optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: int = 1, last_epoch: int = -1
	):
	"""
	Create a schedule with a learning rate that decreases following the values of the cosine function between the
	initial lr set in the optimizer to 0, with several hard restarts, after a warmup period during which it increases
	linearly between 0 and the initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	num_training_steps (`int`):
	The total number of training steps.
	num_cycles (`int`, optional, defaults to 1):
	The number of hard restarts to use.
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""

	lr_lambda = partial(
	_get_cosine_with_hard_restarts_schedule_with_warmup_lr_lambda,
	num_warmup_steps=num_warmup_steps,
	num_training_steps=num_training_steps,
	num_cycles=num_cycles,
	)
	return LambdaLR(optimizer, lr_lambda, last_epoch)


	def _get_polynomial_decay_schedule_with_warmup_lr_lambda(
	current_step: int,
	*,
	num_warmup_steps: int,
	num_training_steps: int,
	lr_end: float,
	power: float,
	lr_init: int,
	):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	elif current_step > num_training_steps:
	return lr_end / lr_init # as LambdaLR multiplies by lr_init
	else:
	lr_range = lr_init - lr_end
	decay_steps = num_training_steps - num_warmup_steps
	pct_remaining = 1 - (current_step - num_warmup_steps) / decay_steps
	decay = lr_range * pct_remaining**power + lr_end
	return decay / lr_init # as LambdaLR multiplies by lr_init


	def get_polynomial_decay_schedule_with_warmup(
	optimizer, num_warmup_steps, num_training_steps, lr_end=1e-7, power=1.0, last_epoch=-1
	):
	"""
	Create a schedule with a learning rate that decreases as a polynomial decay from the initial lr set in the
	optimizer to end lr defined by lr_end, after a warmup period during which it increases linearly from 0 to the
	initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	num_training_steps (`int`):
	The total number of training steps.
	lr_end (`float`, optional, defaults to 1e-7):
	The end LR.
	power (`float`, optional, defaults to 1.0):
	Power factor.
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Note: power defaults to 1.0 as in the fairseq implementation, which in turn is based on the original BERT
	implementation at
	https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/optimization.py#L37

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.

	"""

	lr_init = optimizer.defaults["lr"]
	if not (lr_init > lr_end):
	raise ValueError(f"lr_end ({lr_end}) must be smaller than initial lr ({lr_init})")

	lr_lambda = partial(
	_get_polynomial_decay_schedule_with_warmup_lr_lambda,
	num_warmup_steps=num_warmup_steps,
	num_training_steps=num_training_steps,
	lr_end=lr_end,
	power=power,
	lr_init=lr_init,
	)
	return LambdaLR(optimizer, lr_lambda, last_epoch)


	def _get_inverse_sqrt_schedule_lr_lambda(current_step: int, *, num_warmup_steps: int, timescale: int = None):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	shift = timescale - num_warmup_steps
	decay = 1.0 / math.sqrt((current_step + shift) / timescale)
	return decay


	def get_inverse_sqrt_schedule(
	optimizer: Optimizer, num_warmup_steps: int, timescale: int = None, last_epoch: int = -1
	):
	"""
	Create a schedule with an inverse square-root learning rate, from the initial lr set in the optimizer, after a
	warmup period which increases lr linearly from 0 to the initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	timescale (`int`, optional, defaults to `num_warmup_steps`):
	Time scale.
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""
	# Note: this implementation is adapted from
	# https://github.com/google-research/big_vision/blob/f071ce68852d56099437004fd70057597a95f6ef/big_vision/utils.py#L930

	if timescale is None:
	timescale = num_warmup_steps or 10_000

	lr_lambda = partial(_get_inverse_sqrt_schedule_lr_lambda, num_warmup_steps=num_warmup_steps, timescale=timescale)
	return LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)


	def _get_cosine_schedule_with_warmup_lr_lambda(
	current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float, min_lr_rate: float = 0.0
	):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
	factor = 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))
	factor = factor * (1 - min_lr_rate) + min_lr_rate
	return max(0, factor)


	def get_cosine_with_min_lr_schedule_with_warmup(
	optimizer: Optimizer,
	num_warmup_steps: int,
	num_training_steps: int,
	num_cycles: float = 0.5,
	last_epoch: int = -1,
	min_lr: float = None,
	min_lr_rate: float = None,
	):
	"""
	Create a schedule with a learning rate that decreases following the values of the cosine function between the
	initial lr set in the optimizer to min_lr, after a warmup period during which it increases linearly between 0 and the
	initial lr set in the optimizer.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	num_training_steps (`int`):
	The total number of training steps.
	num_cycles (`float`, optional, defaults to 0.5):
	The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
	following a half-cosine).
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.
	min_lr (`float`, optional):
	The minimum learning rate to reach after the cosine schedule.
	min_lr_rate (`float`, optional):
	The minimum learning rate as a ratio of the initial learning rate. If set, `min_lr` should not be set.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""

	if min_lr is not None and min_lr_rate is not None:
	raise ValueError("Only one of min_lr or min_lr_rate should be set")
	elif min_lr is not None:
	min_lr_rate = min_lr / optimizer.defaults["lr"]
	elif min_lr_rate is None:
	raise ValueError("One of min_lr or min_lr_rate should be set through the `lr_scheduler_kwargs`")

	lr_lambda = partial(
	_get_cosine_schedule_with_warmup_lr_lambda,
	num_warmup_steps=num_warmup_steps,
	num_training_steps=num_training_steps,
	num_cycles=num_cycles,
	min_lr_rate=min_lr_rate,
	)
	return LambdaLR(optimizer, lr_lambda, last_epoch)


	def _get_wsd_scheduler_lambda(
	current_step: int,
	*,
	num_warmup_steps: int,
	num_stable_steps: int,
	num_decay_steps: int,
	num_cycles: float,
	min_lr_ratio: float,
	):
	if current_step < num_warmup_steps:
	return float(current_step) / float(max(1, num_warmup_steps))
	if current_step < num_warmup_steps + num_stable_steps:
	return 1.0
	if current_step < num_warmup_steps + num_stable_steps + num_decay_steps:
	progress = float(current_step - num_warmup_steps - num_stable_steps) / float(max(1, num_decay_steps))
	value = max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
	return (1.0 - min_lr_ratio) * value + min_lr_ratio
	return min_lr_ratio


	def get_wsd_schedule(
	optimizer: Optimizer,
	num_warmup_steps: int,
	num_stable_steps: int,
	num_decay_steps: int,
	min_lr_ratio: float = 0,
	num_cycles: float = 0.5,
	last_epoch: int = -1,
	):
	"""
	Create a schedule with a learning rate that has three stages:
	1. linear increase from 0 to initial lr.
	2. constant lr (equal to initial lr).
	3. decrease following the values of the cosine function between the initial lr set in the optimizer to
	a fraction of initial lr.

	Args:
	optimizer ([`~torch.optim.Optimizer`]):
	The optimizer for which to schedule the learning rate.
	num_warmup_steps (`int`):
	The number of steps for the warmup phase.
	num_stable_steps (`int`):
	The number of steps for the stable phase.
	num_decay_steps (`int`):
	The number of steps for the cosine annealing phase.
	min_lr_ratio (`float`, optional, defaults to 0):
	The minimum learning rate as a ratio of the initial learning rate.
	num_cycles (`float`, optional, defaults to 0.5):
	The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
	following a half-cosine).
	last_epoch (`int`, optional, defaults to -1):
	The index of the last epoch when resuming training.

	Return:
	`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
	"""
	lr_lambda = partial(
	_get_wsd_scheduler_lambda,
	num_warmup_steps=num_warmup_steps,
	num_stable_steps=num_stable_steps,
	num_decay_steps=num_decay_steps,
	min_lr_ratio=min_lr_ratio,
	num_cycles=num_cycles,
	)
	return LambdaLR(optimizer, lr_lambda, last_epoch)