Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,301 Bytes
d711508 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# Copyright 2024-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from copy import deepcopy
from typing import List, Optional
import torch
import torch.nn as nn
from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
class LNTuningLayer(nn.Module, BaseTunerLayer):
"""
Selects a layer from the model.
"""
adapter_layer_names = ("ln_tuning_layers",)
def __init__(self, base_layer: nn.Module, adapter_name: str):
super().__init__()
self.base_layer = base_layer
self.ln_tuning_layers = nn.ModuleDict({})
self.update_layer(self.base_layer, adapter_name)
self._active_adapter = adapter_name
self.merged_adapters = []
def update_layer(self, layer: nn.Module, adapter_name: str):
self.ln_tuning_layers[adapter_name] = deepcopy(layer)
def enable_adapters(self, enabled: bool) -> None:
"""Toggle the enabling and disabling of adapters
Takes care of setting the requires_grad flag for the adapter weights.
Args:
enabled (bool): True to enable adapters, False to disable adapters
"""
if enabled:
self.set_adapter(self.active_adapters)
self._disable_adapters = False
else:
if self.merged:
self.unmerge()
# disable grads on all adapter layers
for layer_name in self.adapter_layer_names:
layer = getattr(self, layer_name)
layer.requires_grad_(False)
self._disable_adapters = True
def merge(self, adapter_names: Optional[List[str]] = None):
adapter_names = check_adapters_to_merge(self, adapter_names)
if not adapter_names:
# no adapter to merge
return
if len(adapter_names) > 1:
raise ValueError(
f"Trying to merge {len(adapter_names)} adapters, but LN "
f"tuning does not allow merging more than one adapter at a time"
)
merged_adapters = set(self.merged_adapters)
if merged_adapters:
warnings.warn(f"Already merged with {merged_adapters}. Unmerging first.")
self.unmerge()
self.base_layer, self.ln_tuning_layers[adapter_names[0]] = (
self.ln_tuning_layers[adapter_names[0]],
self.base_layer,
)
self.merged_adapters.append(adapter_names[0])
def unmerge(self):
if not self.merged:
warnings.warn("Already unmerged. Nothing to do.")
return
# popping one element is sufficient because LN
# tuning does not allow merging more than one adapter at a time.
merged_name = self.merged_adapters.pop()
self.base_layer, self.ln_tuning_layers[merged_name] = (
self.ln_tuning_layers[merged_name],
self.base_layer,
)
def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
if self.disable_adapters:
if self.merged:
self.unmerge()
result = self.base_layer(x, *args, **kwargs)
elif self.merged:
result = self.base_layer(x, *args, **kwargs)
else:
if len(self.active_adapters) != 1:
raise ValueError(
f"Trying to run forward with {len(self.active_adapters)} active "
f"adapters, but LN tuning does not allow inference with more than one adapter at a time"
)
active_adapter = self.active_adapters[0]
result = self.ln_tuning_layers[active_adapter](x, *args, **kwargs)
return result
def __repr__(self) -> str:
rep = super().__repr__()
return "ln_tuning." + rep
|