Spaces:
Runtime error
Runtime error
File size: 15,618 Bytes
592b663 af04de4 784cc97 af04de4 001fbb9 af04de4 c7b9e3f af04de4 001fbb9 af04de4 592b663 af04de4 6c5232f 592b663 6c5232f 592b663 6c5232f 592b663 af04de4 b8758c8 af04de4 001fbb9 af04de4 67a37c0 af04de4 b32410e f80172c b32410e f80172c af04de4 001fbb9 af04de4 09bdd6c 001fbb9 09bdd6c 001fbb9 09bdd6c 784cc97 af04de4 09bdd6c af04de4 09bdd6c af04de4 001fbb9 af04de4 784cc97 af04de4 784cc97 af04de4 784cc97 af04de4 611507d af04de4 784cc97 af04de4 0e9e537 af04de4 67a37c0 af04de4 c7b9e3f af04de4 592b663 af04de4 611507d af04de4 b8758c8 af04de4 592b663 af04de4 c7b9e3f af04de4 001fbb9 af04de4 001fbb9 af04de4 bc9310c af04de4 001fbb9 af04de4 c7b9e3f 611507d af04de4 0e9e537 af04de4 611507d af04de4 b8758c8 af04de4 611507d af04de4 b8758c8 af04de4 b8758c8 af04de4 c7b9e3f af04de4 b8758c8 af04de4 e75ffde b8758c8 e75ffde b8758c8 67a37c0 b8758c8 e75ffde 001fbb9 b8758c8 e75ffde b8758c8 e75ffde b8758c8 611507d b8758c8 e75ffde b8758c8 af04de4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
from typing import Optional
import nbformat as nbf
from utils import FTDataSet, falcon, gemma
def create_install_libraries_cells(cells: list):
text_cell = nbf.v4.new_markdown_cell("# Installing Required Libraries!")
text_cell1 = nbf.v4.new_markdown_cell(
"Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
"and bitsandbytes.")
code = """
!pip install -q --upgrade "transformers==4.38.2"
!pip install -q --upgrade "datasets==2.16.1"
!pip install -q --upgrade "accelerate==0.26.1"
!pip install -q --upgrade "evaluate==0.4.1"
!pip install -q --upgrade "bitsandbytes==0.42.0"
!pip install -q --upgrade "trl==0.7.11"
!pip install -q --upgrade "peft==0.8.2"
"""
code_pytorch = """
# Checks if PyTorch is installed and installs it if not.
try:
import torch
print("PyTorch is installed!")
except ImportError:
print("PyTorch is not installed.")
!pip install -q torch
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(text_cell1)
cells.append(nbf.v4.new_code_cell(code_pytorch))
cells.append(code_cell)
def create_install_flash_attention(cells: list):
text_cell = nbf.v4.new_markdown_cell(
"## Installing Flash Attention")
text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
"and runtime cost of the attention layer, and improve the performance of "
"the model training. Learn more at [FlashAttention]("
"https://github.com/Dao-AILab/flash-attention/tree/main)."
" Installing flash "
"attention from source can take quite a bit of time (~ "
"minutes).")
code = """
import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
!pip install ninja packaging
!MAX_JOBS=4 pip install -q flash-attn --no-build-isolation --upgrade
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(text_cell1)
cells.append(code_cell)
def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None,
output_dir: Optional[str] = None):
text_cell = nbf.v4.new_markdown_cell("## Login to HF")
text_1 = f"Replace `HF_TOKEN` with a valid token in order to push **'{output_dir}'** to `huggingface_hub`."
if should_login:
text_1 = f"Replace `HF_TOKEN` with a valid token in order to load **'{model_name}'** from `huggingface_hub`."
text_cell1 = nbf.v4.new_markdown_cell(text_1)
code = """
# Install huggingface_hub
!pip install -q huggingface_hub
from huggingface_hub import login
login(
token='HF_TOKEN',
add_to_git_credential=True
)
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(text_cell1)
cells.append(code_cell)
def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
text_cell = nbf.v4.new_markdown_cell("# Load and Prepare the Dataset")
text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
'https://huggingface.co/docs/trl/index/), and ready for supervised finetuning.'
text_format = """
**Conversational format:**
```python {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
{"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
{"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
```
"""
text_cell1 = nbf.v4.new_markdown_cell(text)
text_cell2 = nbf.v4.new_markdown_cell(text_format)
code = f"""
from datasets import load_dataset
# Load dataset from the hub
dataset = load_dataset("{dataset.path}", split="{dataset.dataset_split}")
dataset = dataset.shuffle(seed={seed})
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(text_cell1)
cells.append(text_cell2)
cells.append(code_cell)
def create_model_cells(cells: list, model_id: str, version: str, flash_attention: bool, pad_side: str, pad_value: str,
load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
bnb_4bit_compute_dtype: str
):
text_cell = nbf.v4.new_markdown_cell(f"# Load **{model_id}-{version}** for Finetuning")
load_in_4bit_str = f"{load_in_4bit}=True"
flash_attention_str = "attn_implementation='flash_attention_2',"
if not flash_attention:
flash_attention_str = ''
pad_value_str = "tokenizer.pad_token = tokenizer.eos_token"
if pad_value is None:
pad_value_str = ""
auto_model_import = "AutoModelForCausalLM"
trust_code = "trust_remote_code=True,"
if model_id == falcon.name:
auto_model_import = "FalconForCausalLM"
trust_code = ""
chat_ml = """
# Set chat template to OAI chatML
model, tokenizer = setup_chat_format(model, tokenizer)
"""
note = f"""
> **Note:** For `{model_id}`, we will not use `setup_chat_format`. Instead, we will directly use this tokenizer, [philschmid/gemma-tokenizer-chatml](https://huggingface.co/philschmid/gemma-tokenizer-chatml), to fine-tune `{model_id}` with ChatML.
"""
tokenizer_id = f"{model_id}-{version}"
if model_id == gemma.name:
tokenizer_id = "philschmid/gemma-tokenizer-chatml"
chat_ml =""
else:
note = ""
code = f"""
import torch
from transformers import AutoTokenizer, {auto_model_import}, BitsAndBytesConfig
from trl import setup_chat_format
# Hugging Face model id
model_id = "{model_id}-{version}"
# BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
{load_in_4bit_str}, bnb_4bit_use_double_quant={bnb_4bit_use_double_quant},
bnb_4bit_quant_type="{bnb_4bit_quant_type}", bnb_4bit_compute_dtype={bnb_4bit_compute_dtype}
)
# Load model and tokenizer
model = {auto_model_import}.from_pretrained(
model_id,
device_map="auto",
{trust_code}
{flash_attention_str}
torch_dtype=torch.bfloat16,
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained("{tokenizer_id}")
tokenizer.padding_side = "{pad_side}"
{pad_value_str}
{chat_ml}
"""
text_1 = f"""
This process involves two key steps:
1. **LLM Quantization:**
- We first load the selected large language model (LLM).
- We then use the `bitsandbytes` library to quantize the model, which can significantly reduce its memory footprint.
> **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require
a 24GB GPU for fine-tuning.
2. **Chat Model Preparation:**
- To train a model for chat/conversational tasks, we need to prepare both the model and its tokenizer.
- This involves adding special tokens to the tokenizer and the model itself. These tokens help the model
understand the different roles within a conversation.
- The **trl** provides a convenient method called `setup_chat_format` for this purpose. This method performs the
following actions:
* Adds special tokens to the tokenizer, such as `<|im_start|>` and `<|im_end|>`, to mark the beginning and
ending of a conversation.
* Resizes the model's embedding layer to accommodate the new tokens.
* Sets the tokenizer's chat template, which defines the format used to convert input data into a chat-like
structure. The default template is `chatml` from OpenAI.
{note}
"""
code_cell = nbf.v4.new_code_cell(code)
text_cell1 = nbf.v4.new_markdown_cell(text_1)
cells.append(text_cell)
cells.append(text_cell1)
cells.append(code_cell)
def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
text_cell = nbf.v4.new_markdown_cell("## Setting LoRA Config")
code = f"""
from peft import LoraConfig
peft_config = LoraConfig(
lora_alpha={alpha},
lora_dropout={dropout},
r={r},
bias="{bias}",
target_modules="all-linear",
task_type="CAUSAL_LM"
)
"""
text = """The `SFTTrainer` provides native integration with `peft`, simplifying the process of efficiently tuning
Language Models (LLMs) using techniques such as [LoRA](
https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms). The only requirement is to create
the `LoraConfig` and pass it to the `SFTTrainer`.
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(nbf.v4.new_markdown_cell(text))
cells.append(code_cell)
def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, per_device_train_batch_size,
save_strategy, gradient_accumulation_steps, gradient_checkpointing,
learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
report_to, seed):
text_cell = nbf.v4.new_markdown_cell("## Setting the TrainingArguments")
to_install = None
if report_to == "all":
to_install = "azure_ml comet_ml mlflow tensorboard wandb"
elif report_to != "none":
to_install = report_to
gradient_checkpointing_kwargs = {"use_reentrant": False}
code_report = f"""
# Installing {to_install} to report the metrics
!pip install -q {to_install}
"""
code = f"""
from transformers import TrainingArguments
args = TrainingArguments(
output_dir="temp_{output_dir}",
num_train_epochs={epochs},
per_device_train_batch_size={per_device_train_batch_size},
gradient_accumulation_steps={gradient_accumulation_steps},
gradient_checkpointing={gradient_checkpointing},
gradient_checkpointing_kwargs={gradient_checkpointing_kwargs},
optim="adamw_torch_fused",
logging_steps={logging_steps},
save_strategy='{save_strategy}',
learning_rate={learning_rate},
bf16=True,
max_grad_norm={max_grad_norm},
warmup_ratio={warmup_ratio},
lr_scheduler_type='{lr_scheduler_type}',
report_to='{report_to}',
max_steps={max_steps},
seed={seed},
overwrite_output_dir=True,
remove_unused_columns=True
)
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
if to_install is not None:
cells.append(nbf.v4.new_code_cell(code_report))
cells.append(code_cell)
def create_sft_trainer_cells(cells: list, max_seq_length, packing):
text_cell = nbf.v4.new_markdown_cell(
"""## Setting the Supervised Finetuning Trainer (`SFTTrainer`)
This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
The trainer takes care of properly initializing the `PeftModel`.
""")
dataset_kwargs = {
"add_special_tokens": False, # We template with special tokens
"append_concat_token": False, # No need to add additional separator token
}
code = f"""
from trl import SFTTrainer
trainer = SFTTrainer(
model=model,
args=args,
train_dataset=dataset,
peft_config=peft_config,
max_seq_length={max_seq_length},
tokenizer=tokenizer,
packing={packing},
dataset_kwargs={dataset_kwargs}
)
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(code_cell)
def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
if push_to_hub:
save_txt = f"and to the hub in **'User/{output_dir}'**."
else:
save_txt = "."
epoch_str = f"{epochs} epochs"
if max_steps > 0:
epoch_str = f"{max_steps} steps"
text_cell = nbf.v4.new_markdown_cell(
f"""### Starting Training and Saving Model/Tokenizer
We start training the model by calling the `train()` method on the trainer instance. This will start the training
loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory (**'temp_{output_dir}'**)
{save_txt}
""")
code = f"""
model.config.use_cache = False
# start training
trainer.train()
# save the peft model
trainer.save_model()
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(code_cell)
def create_free_gpu_cells(cells: list):
text_cell = nbf.v4.new_markdown_cell(
"""### Free the GPU Memory to Prepare Merging `LoRA` Adapters with the Base Model
""")
code = f"""
# Free the GPU memory
del model
del trainer
torch.cuda.empty_cache()
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(code_cell)
def create_merge_lora_cells(cells: list, output_dir):
text_cell = nbf.v4.new_markdown_cell(
"""## Merging LoRA Adapters into the Original Model
While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
entire model for easier usage with Text Generation Inference, we can incorporate the adapter weights into the model
weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the
`save_pretrained` method. The result is a default model that is ready for inference.
""")
code = f"""
import torch
from peft import AutoPeftModelForCausalLM
# Load Peft model on CPU
model = AutoPeftModelForCausalLM.from_pretrained(
"temp_{output_dir}",
torch_dtype=torch.float16,
low_cpu_mem_usage=True
)
# Merge LoRA with the base model and save
merged_model = model.merge_and_unload()
merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB")
tokenizer.save_pretrained("{output_dir}")
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(code_cell)
def merge_model_cells(cells: list, output_dir):
text_cell = nbf.v4.new_markdown_cell(
f"### Copy all result folders from 'temp_{output_dir}' to '{output_dir}'")
code = f"""
import os
import shutil
source_folder = "temp_{output_dir}"
destination_folder = "{output_dir}"
os.makedirs(destination_folder, exist_ok=True)
for item in os.listdir(source_folder):
item_path = os.path.join(source_folder, item)
if os.path.isdir(item_path):
destination_path = os.path.join(destination_folder, item)
shutil.copytree(item_path, destination_path)
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(text_cell)
cells.append(code_cell)
def push_to_hub_cells(cells: list, output_dir):
text = f"## Pushing '{output_dir}' to the Hugging Face account."
code = f"""
from huggingface_hub import HfApi, HfFolder, Repository
# Instantiate the HfApi class
api = HfApi()
# Our Hugging Face repository
repo_name = "{output_dir}"
# Create a repository on the Hugging Face Hub
repo = api.create_repo(token=HfFolder.get_token(), repo_type="model", repo_id=repo_name)
api.upload_folder(
folder_path="{output_dir}",
repo_id=repo.repo_id
)
"""
code_cell = nbf.v4.new_code_cell(code)
cells.append(nbf.v4.new_markdown_cell(text))
cells.append(code_cell)
|