Spaces:
Runtime error
Runtime error
from typing import Optional | |
import nbformat as nbf | |
from utils import FTDataSet, falcon, gemma | |
def create_install_libraries_cells(cells: list): | |
text_cell = nbf.v4.new_markdown_cell("# Installing Required Libraries!") | |
text_cell1 = nbf.v4.new_markdown_cell( | |
"Installing required libraries, including trl, transformers, accelerate, peft, datasets, " | |
"and bitsandbytes.") | |
code = """ | |
!pip install -q --upgrade "transformers==4.38.2" | |
!pip install -q --upgrade "datasets==2.16.1" | |
!pip install -q --upgrade "accelerate==0.26.1" | |
!pip install -q --upgrade "evaluate==0.4.1" | |
!pip install -q --upgrade "bitsandbytes==0.42.0" | |
!pip install -q --upgrade "trl==0.7.11" | |
!pip install -q --upgrade "peft==0.8.2" | |
""" | |
code_pytorch = """ | |
# Checks if PyTorch is installed and installs it if not. | |
try: | |
import torch | |
print("PyTorch is installed!") | |
except ImportError: | |
print("PyTorch is not installed.") | |
!pip install -q torch | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(text_cell1) | |
cells.append(nbf.v4.new_code_cell(code_pytorch)) | |
cells.append(code_cell) | |
def create_install_flash_attention(cells: list): | |
text_cell = nbf.v4.new_markdown_cell( | |
"## Installing Flash Attention") | |
text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory " | |
"and runtime cost of the attention layer, and improve the performance of " | |
"the model training. Learn more at [FlashAttention](" | |
"https://github.com/Dao-AILab/flash-attention/tree/main)." | |
" Installing flash " | |
"attention from source can take quite a bit of time (~ " | |
"minutes).") | |
code = """ | |
import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention' | |
!pip install ninja packaging | |
!MAX_JOBS=4 pip install -q flash-attn --no-build-isolation --upgrade | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(text_cell1) | |
cells.append(code_cell) | |
def create_login_hf_cells(cells: list, should_login: bool = False, model_name: Optional[str] = None, | |
output_dir: Optional[str] = None): | |
text_cell = nbf.v4.new_markdown_cell("## Login to HF") | |
text_1 = f"Replace `HF_TOKEN` with a valid token in order to push **'{output_dir}'** to `huggingface_hub`." | |
if should_login: | |
text_1 = f"Replace `HF_TOKEN` with a valid token in order to load **'{model_name}'** from `huggingface_hub`." | |
text_cell1 = nbf.v4.new_markdown_cell(text_1) | |
code = """ | |
# Install huggingface_hub | |
!pip install -q huggingface_hub | |
from huggingface_hub import login | |
login( | |
token='HF_TOKEN', | |
add_to_git_credential=True | |
) | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(text_cell1) | |
cells.append(code_cell) | |
def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int): | |
text_cell = nbf.v4.new_markdown_cell("# Load and Prepare the Dataset") | |
text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \ | |
'https://huggingface.co/docs/trl/index/), and ready for supervised finetuning.' | |
text_format = """ | |
**Conversational format:** | |
```python {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} | |
{"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} | |
{"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} | |
``` | |
""" | |
text_cell1 = nbf.v4.new_markdown_cell(text) | |
text_cell2 = nbf.v4.new_markdown_cell(text_format) | |
code = f""" | |
from datasets import load_dataset | |
# Load dataset from the hub | |
dataset = load_dataset("{dataset.path}", split="{dataset.dataset_split}") | |
dataset = dataset.shuffle(seed={seed}) | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(text_cell1) | |
cells.append(text_cell2) | |
cells.append(code_cell) | |
def create_model_cells(cells: list, model_id: str, version: str, flash_attention: bool, pad_side: str, pad_value: str, | |
load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str, | |
bnb_4bit_compute_dtype: str | |
): | |
text_cell = nbf.v4.new_markdown_cell(f"# Load **{model_id}-{version}** for Finetuning") | |
load_in_4bit_str = f"{load_in_4bit}=True" | |
flash_attention_str = "attn_implementation='flash_attention_2'," | |
if not flash_attention: | |
flash_attention_str = '' | |
pad_value_str = "tokenizer.pad_token = tokenizer.eos_token" | |
if pad_value is None: | |
pad_value_str = "" | |
auto_model_import = "AutoModelForCausalLM" | |
trust_code = "trust_remote_code=True," | |
if model_id == falcon.name: | |
auto_model_import = "FalconForCausalLM" | |
trust_code = "" | |
chat_ml = """ | |
# Set chat template to OAI chatML | |
model, tokenizer = setup_chat_format(model, tokenizer) | |
""" | |
note = f""" | |
> **Note:** For `{model_id}`, we will not use `setup_chat_format`. Instead, we will directly use this tokenizer, [philschmid/gemma-tokenizer-chatml](https://huggingface.co/philschmid/gemma-tokenizer-chatml), to fine-tune `{model_id}` with ChatML. | |
""" | |
tokenizer_id = f"{model_id}-{version}" | |
if model_id == gemma.name: | |
tokenizer_id = "philschmid/gemma-tokenizer-chatml" | |
chat_ml ="" | |
else: | |
note = "" | |
code = f""" | |
import torch | |
from transformers import AutoTokenizer, {auto_model_import}, BitsAndBytesConfig | |
from trl import setup_chat_format | |
# Hugging Face model id | |
model_id = "{model_id}-{version}" | |
# BitsAndBytesConfig | |
bnb_config = BitsAndBytesConfig( | |
{load_in_4bit_str}, bnb_4bit_use_double_quant={bnb_4bit_use_double_quant}, | |
bnb_4bit_quant_type="{bnb_4bit_quant_type}", bnb_4bit_compute_dtype={bnb_4bit_compute_dtype} | |
) | |
# Load model and tokenizer | |
model = {auto_model_import}.from_pretrained( | |
model_id, | |
device_map="auto", | |
{trust_code} | |
{flash_attention_str} | |
torch_dtype=torch.bfloat16, | |
quantization_config=bnb_config | |
) | |
tokenizer = AutoTokenizer.from_pretrained("{tokenizer_id}") | |
tokenizer.padding_side = "{pad_side}" | |
{pad_value_str} | |
{chat_ml} | |
""" | |
text_1 = f""" | |
This process involves two key steps: | |
1. **LLM Quantization:** | |
- We first load the selected large language model (LLM). | |
- We then use the `bitsandbytes` library to quantize the model, which can significantly reduce its memory footprint. | |
> **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require | |
a 24GB GPU for fine-tuning. | |
2. **Chat Model Preparation:** | |
- To train a model for chat/conversational tasks, we need to prepare both the model and its tokenizer. | |
- This involves adding special tokens to the tokenizer and the model itself. These tokens help the model | |
understand the different roles within a conversation. | |
- The **trl** provides a convenient method called `setup_chat_format` for this purpose. This method performs the | |
following actions: | |
* Adds special tokens to the tokenizer, such as `<|im_start|>` and `<|im_end|>`, to mark the beginning and | |
ending of a conversation. | |
* Resizes the model's embedding layer to accommodate the new tokens. | |
* Sets the tokenizer's chat template, which defines the format used to convert input data into a chat-like | |
structure. The default template is `chatml` from OpenAI. | |
{note} | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
text_cell1 = nbf.v4.new_markdown_cell(text_1) | |
cells.append(text_cell) | |
cells.append(text_cell1) | |
cells.append(code_cell) | |
def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str): | |
text_cell = nbf.v4.new_markdown_cell("## Setting LoRA Config") | |
code = f""" | |
from peft import LoraConfig | |
peft_config = LoraConfig( | |
lora_alpha={alpha}, | |
lora_dropout={dropout}, | |
r={r}, | |
bias="{bias}", | |
target_modules="all-linear", | |
task_type="CAUSAL_LM" | |
) | |
""" | |
text = """The `SFTTrainer` provides native integration with `peft`, simplifying the process of efficiently tuning | |
Language Models (LLMs) using techniques such as [LoRA]( | |
https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms). The only requirement is to create | |
the `LoraConfig` and pass it to the `SFTTrainer`. | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(nbf.v4.new_markdown_cell(text)) | |
cells.append(code_cell) | |
def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, per_device_train_batch_size, | |
save_strategy, gradient_accumulation_steps, gradient_checkpointing, | |
learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir, | |
report_to, seed): | |
text_cell = nbf.v4.new_markdown_cell("## Setting the TrainingArguments") | |
to_install = None | |
if report_to == "all": | |
to_install = "azure_ml comet_ml mlflow tensorboard wandb" | |
elif report_to != "none": | |
to_install = report_to | |
gradient_checkpointing_kwargs = {"use_reentrant": False} | |
code_report = f""" | |
# Installing {to_install} to report the metrics | |
!pip install -q {to_install} | |
""" | |
code = f""" | |
from transformers import TrainingArguments | |
args = TrainingArguments( | |
output_dir="temp_{output_dir}", | |
num_train_epochs={epochs}, | |
per_device_train_batch_size={per_device_train_batch_size}, | |
gradient_accumulation_steps={gradient_accumulation_steps}, | |
gradient_checkpointing={gradient_checkpointing}, | |
gradient_checkpointing_kwargs={gradient_checkpointing_kwargs}, | |
optim="adamw_torch_fused", | |
logging_steps={logging_steps}, | |
save_strategy='{save_strategy}', | |
learning_rate={learning_rate}, | |
bf16=True, | |
max_grad_norm={max_grad_norm}, | |
warmup_ratio={warmup_ratio}, | |
lr_scheduler_type='{lr_scheduler_type}', | |
report_to='{report_to}', | |
max_steps={max_steps}, | |
seed={seed}, | |
overwrite_output_dir=True, | |
remove_unused_columns=True | |
) | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
if to_install is not None: | |
cells.append(nbf.v4.new_code_cell(code_report)) | |
cells.append(code_cell) | |
def create_sft_trainer_cells(cells: list, max_seq_length, packing): | |
text_cell = nbf.v4.new_markdown_cell( | |
"""## Setting the Supervised Finetuning Trainer (`SFTTrainer`) | |
This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods. | |
The trainer takes care of properly initializing the `PeftModel`. | |
""") | |
dataset_kwargs = { | |
"add_special_tokens": False, # We template with special tokens | |
"append_concat_token": False, # No need to add additional separator token | |
} | |
code = f""" | |
from trl import SFTTrainer | |
trainer = SFTTrainer( | |
model=model, | |
args=args, | |
train_dataset=dataset, | |
peft_config=peft_config, | |
max_seq_length={max_seq_length}, | |
tokenizer=tokenizer, | |
packing={packing}, | |
dataset_kwargs={dataset_kwargs} | |
) | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(code_cell) | |
def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir): | |
if push_to_hub: | |
save_txt = f"and to the hub in **'User/{output_dir}'**." | |
else: | |
save_txt = "." | |
epoch_str = f"{epochs} epochs" | |
if max_steps > 0: | |
epoch_str = f"{max_steps} steps" | |
text_cell = nbf.v4.new_markdown_cell( | |
f"""### Starting Training and Saving Model/Tokenizer | |
We start training the model by calling the `train()` method on the trainer instance. This will start the training | |
loop and train the model for `{epoch_str}`. The model will be automatically saved to the output directory (**'temp_{output_dir}'**) | |
{save_txt} | |
""") | |
code = f""" | |
model.config.use_cache = False | |
# start training | |
trainer.train() | |
# save the peft model | |
trainer.save_model() | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(code_cell) | |
def create_free_gpu_cells(cells: list): | |
text_cell = nbf.v4.new_markdown_cell( | |
"""### Free the GPU Memory to Prepare Merging `LoRA` Adapters with the Base Model | |
""") | |
code = f""" | |
# Free the GPU memory | |
del model | |
del trainer | |
torch.cuda.empty_cache() | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(code_cell) | |
def create_merge_lora_cells(cells: list, output_dir): | |
text_cell = nbf.v4.new_markdown_cell( | |
"""## Merging LoRA Adapters into the Original Model | |
While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the | |
model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the | |
entire model for easier usage with Text Generation Inference, we can incorporate the adapter weights into the model | |
weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the | |
`save_pretrained` method. The result is a default model that is ready for inference. | |
""") | |
code = f""" | |
import torch | |
from peft import AutoPeftModelForCausalLM | |
# Load Peft model on CPU | |
model = AutoPeftModelForCausalLM.from_pretrained( | |
"temp_{output_dir}", | |
torch_dtype=torch.float16, | |
low_cpu_mem_usage=True | |
) | |
# Merge LoRA with the base model and save | |
merged_model = model.merge_and_unload() | |
merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB") | |
tokenizer.save_pretrained("{output_dir}") | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(code_cell) | |
def merge_model_cells(cells: list, output_dir): | |
text_cell = nbf.v4.new_markdown_cell( | |
f"### Copy all result folders from 'temp_{output_dir}' to '{output_dir}'") | |
code = f""" | |
import os | |
import shutil | |
source_folder = "temp_{output_dir}" | |
destination_folder = "{output_dir}" | |
os.makedirs(destination_folder, exist_ok=True) | |
for item in os.listdir(source_folder): | |
item_path = os.path.join(source_folder, item) | |
if os.path.isdir(item_path): | |
destination_path = os.path.join(destination_folder, item) | |
shutil.copytree(item_path, destination_path) | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(text_cell) | |
cells.append(code_cell) | |
def push_to_hub_cells(cells: list, output_dir): | |
text = f"## Pushing '{output_dir}' to the Hugging Face account." | |
code = f""" | |
from huggingface_hub import HfApi, HfFolder, Repository | |
# Instantiate the HfApi class | |
api = HfApi() | |
# Our Hugging Face repository | |
repo_name = "{output_dir}" | |
# Create a repository on the Hugging Face Hub | |
repo = api.create_repo(token=HfFolder.get_token(), repo_type="model", repo_id=repo_name) | |
api.upload_folder( | |
folder_path="{output_dir}", | |
repo_id=repo.repo_id | |
) | |
""" | |
code_cell = nbf.v4.new_code_cell(code) | |
cells.append(nbf.v4.new_markdown_cell(text)) | |
cells.append(code_cell) | |