Uploaded model

  • Developed by: katsumasa
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.

Maked code(train, inference, upload)

import torch
from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer
from unsloth import FastLanguageModel, is_bfloat16_supported
from datasets import load_dataset
from trl import SFTTrainer
from datetime import datetime
import os
import shutil
import json
from tqdm import tqdm
from peft import PeftModel
from dotenv import load_dotenv

# 共通の設定の前に追加
def get_hf_token():
    """
    HuggingFaceトークンを取得する関数
    """
    load_dotenv()
    token = os.getenv("HF_TOKEN")
    if not token:
        raise ValueError("HuggingFace token not found. Please set HF_TOKEN in .env file or environment variables")
    return token

# 共通の設定
max_seq_length = 512
model_name = "llm-jp/llm-jp-3-13b"
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"./outputs/{current_time}"
os.makedirs(output_dir, exist_ok=True)

# トークン取得を修正
HF_TOKEN = get_hf_token()

# トークナイザーの初期化
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    model_max_length=max_seq_length,
    padding_side="right",
    truncation_side="right",
)

# 学習用のモデル準備と学習の実行
def train_model():
    dataset = load_dataset("json", data_files=[
    "./converted_data/converted_generated_data_20241215_190410.json",
    "./converted_data/converted_generated_data_20241215_214354.json",
    "./converted_data/converted_generated_data_20241216_000521.json",
    "./converted_data/converted_generated_data_20241216_111233.json",
    "./converted_data/converted_generated_data_20241216_205637.json",
    "./input/ichikara-instruction-003-001-1.json",
    ])
    
    prompt = """### 指示
{}
### 回答
{}"""

    def formatting_prompts_func(examples):
        input = examples["text"]
        output = examples["output"]
        text = prompt.format(input, output) + tokenizer.eos_token
        return {"formatted_text": text}

    dataset = dataset.map(
        formatting_prompts_func,
        num_proc=4,
    )

    model, _ = FastLanguageModel.from_pretrained(
        model_name=model_name,
        dtype=None,
        load_in_4bit=True,
        trust_remote_code=True,
    )

    model = FastLanguageModel.get_peft_model(
        model,
        r=32,
        target_modules=[
            "q_proj", "k_proj", "v_proj", "o_proj",
            "gate_proj", "up_proj", "down_proj",
        ],
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
        use_rslora=False,
        loftq_config=None,
        max_seq_length=max_seq_length,
    )

    current_script = os.path.abspath(__file__)
    script_backup = os.path.join(output_dir, "train_script.py")
    shutil.copy2(current_script, script_backup)

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset["train"],
        max_seq_length=max_seq_length,
        dataset_text_field="formatted_text",
        packing=False,
        args=TrainingArguments(
            per_device_train_batch_size=2,
            gradient_accumulation_steps=4,
            num_train_epochs=1,
            logging_steps=10,
            warmup_steps=10,
            save_steps=100,
            save_total_limit=2,
            max_steps=-1,
            learning_rate=2e-4,
            fp16=not is_bfloat16_supported(),
            bf16=is_bfloat16_supported(),
            group_by_length=True,
            seed=3407,
            output_dir=output_dir,
            report_to="none",
        ),
    )

    trainer.train()
    return output_dir

# 推論の実行
def inference(checkpoint_dir):
    # データセットの読み込み
    datasets = []
    with open("./content/elyza-tasks-100-TV_0.jsonl", "r") as f:
        item = ""
        for line in f:
            line = line.strip()
            item += line
            if item.endswith("}"):
                datasets.append(json.loads(item))
                item = ""

    # 推論用モデルの準備
    model, _ = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        dtype="bfloat16",
        load_in_4bit=True,
    )

    model = PeftModel.from_pretrained(model, checkpoint_dir)
    model.eval()
    model = FastLanguageModel.for_inference(model)

    results = []
    for dt in tqdm(datasets):
        input = dt["input"]
        prompt = f"""### 指示\n{input}\n### 回答\n"""
        inputs = tokenizer(
            [prompt],
            return_tensors="pt",
            return_token_type_ids=False
        ).to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=False,
            repetition_penalty=1.2
        )
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
        results.append({
            "task_id": dt["task_id"],
            "input": input,
            "output": prediction
        })

    # 結果の保存
    output_file = os.path.join(output_dir, f"predictions_{current_time}.jsonl")
    with open(output_file, 'w', encoding='utf-8') as f:
        for result in results:
            json.dump(result, f, ensure_ascii=False)
            f.write('\n')

def save_model_to_hub(model, tokenizer, timestamp):
    """
    LoRAアダプタをHuggingFace Hubに保存する関数
    """
    new_model_id = f"llm-jp-lora-{timestamp}"
    
    print(f"Saving LoRA adapter to Hub as {new_model_id}_lora...")
    try:
        model.push_to_hub_merged(
            new_model_id+"_lora",
            tokenizer=tokenizer,
            save_method="lora",
            token=HF_TOKEN,
            private=True
        )
        print("Successfully saved model to Hub")
    except Exception as e:
        print(f"Error saving model to Hub: {e}")

if __name__ == "__main__":
    # 学習の実行
    trained_dir = train_model()
    
    # 最新のチェックポイントを探す
    checkpoints = [d for d in os.listdir(trained_dir) if d.startswith('checkpoint-')]
    latest_checkpoint = max(checkpoints, key=lambda x: int(x.split('-')[1]))
    checkpoint_path = os.path.join(trained_dir, latest_checkpoint)
    
    # 推論の実行
    inference(checkpoint_path)

    
    # モデルをHubに保存
    model, _ = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        dtype="bfloat16",
        load_in_4bit=True,
    )
    model = PeftModel.from_pretrained(model, checkpoint_path)
    save_model_to_hub(model, tokenizer, current_time)
    
    print(f"学習出力ディレクトリ: {trained_dir}")
    print(f"推論結果の保存先: {os.path.join(output_dir, f'predictions_{current_time}.jsonl')}")
Downloads last month
10
Safetensors
Model size
7.69B params
Tensor type
F32
·
BF16
·
U8
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for katsumasa/llm-jp-lora-20241217_005634_lora

Quantized
(12)
this model