--- base_model: llm-jp/llm-jp-3-13b tags: - text-generation-inference - transformers - unsloth - llama - trl license: apache-2.0 language: - en - ja --- # Uploaded model - **Developed by:** katsumasa - **License:** apache-2.0 - **Finetuned from model :** llm-jp/llm-jp-3-13b This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library. [](https://github.com/unslothai/unsloth) # Maked code(train, inference, upload) ```python import torch from transformers import TrainingArguments, AutoModelForCausalLM, AutoTokenizer from unsloth import FastLanguageModel, is_bfloat16_supported from datasets import load_dataset from trl import SFTTrainer from datetime import datetime import os import shutil import json from tqdm import tqdm from peft import PeftModel from dotenv import load_dotenv # 共通の設定の前に追加 def get_hf_token(): """ HuggingFaceトークンを取得する関数 """ load_dotenv() token = os.getenv("HF_TOKEN") if not token: raise ValueError("HuggingFace token not found. Please set HF_TOKEN in .env file or environment variables") return token # 共通の設定 max_seq_length = 512 model_name = "llm-jp/llm-jp-3-13b" current_time = datetime.now().strftime("%Y%m%d_%H%M%S") output_dir = f"./outputs/{current_time}" os.makedirs(output_dir, exist_ok=True) # トークン取得を修正 HF_TOKEN = get_hf_token() # トークナイザーの初期化 tokenizer = AutoTokenizer.from_pretrained( model_name, model_max_length=max_seq_length, padding_side="right", truncation_side="right", ) # 学習用のモデル準備と学習の実行 def train_model(): dataset = load_dataset("json", data_files=[ "./converted_data/converted_generated_data_20241215_190410.json", "./converted_data/converted_generated_data_20241215_214354.json", "./converted_data/converted_generated_data_20241216_000521.json", "./converted_data/converted_generated_data_20241216_111233.json", "./converted_data/converted_generated_data_20241216_205637.json", "./input/ichikara-instruction-003-001-1.json", ]) prompt = """### 指示 {} ### 回答 {}""" def formatting_prompts_func(examples): input = examples["text"] output = examples["output"] text = prompt.format(input, output) + tokenizer.eos_token return {"formatted_text": text} dataset = dataset.map( formatting_prompts_func, num_proc=4, ) model, _ = FastLanguageModel.from_pretrained( model_name=model_name, dtype=None, load_in_4bit=True, trust_remote_code=True, ) model = FastLanguageModel.get_peft_model( model, r=32, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], lora_alpha=32, lora_dropout=0.05, bias="none", use_gradient_checkpointing="unsloth", random_state=3407, use_rslora=False, loftq_config=None, max_seq_length=max_seq_length, ) current_script = os.path.abspath(__file__) script_backup = os.path.join(output_dir, "train_script.py") shutil.copy2(current_script, script_backup) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset["train"], max_seq_length=max_seq_length, dataset_text_field="formatted_text", packing=False, args=TrainingArguments( per_device_train_batch_size=2, gradient_accumulation_steps=4, num_train_epochs=1, logging_steps=10, warmup_steps=10, save_steps=100, save_total_limit=2, max_steps=-1, learning_rate=2e-4, fp16=not is_bfloat16_supported(), bf16=is_bfloat16_supported(), group_by_length=True, seed=3407, output_dir=output_dir, report_to="none", ), ) trainer.train() return output_dir # 推論の実行 def inference(checkpoint_dir): # データセットの読み込み datasets = [] with open("./content/elyza-tasks-100-TV_0.jsonl", "r") as f: item = "" for line in f: line = line.strip() item += line if item.endswith("}"): datasets.append(json.loads(item)) item = "" # 推論用モデルの準備 model, _ = FastLanguageModel.from_pretrained( model_name=model_name, max_seq_length=max_seq_length, dtype="bfloat16", load_in_4bit=True, ) model = PeftModel.from_pretrained(model, checkpoint_dir) model.eval() model = FastLanguageModel.for_inference(model) results = [] for dt in tqdm(datasets): input = dt["input"] prompt = f"""### 指示\n{input}\n### 回答\n""" inputs = tokenizer( [prompt], return_tensors="pt", return_token_type_ids=False ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=512, do_sample=False, repetition_penalty=1.2 ) prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1] results.append({ "task_id": dt["task_id"], "input": input, "output": prediction }) # 結果の保存 output_file = os.path.join(output_dir, f"predictions_{current_time}.jsonl") with open(output_file, 'w', encoding='utf-8') as f: for result in results: json.dump(result, f, ensure_ascii=False) f.write('\n') def save_model_to_hub(model, tokenizer, timestamp): """ LoRAアダプタをHuggingFace Hubに保存する関数 """ new_model_id = f"llm-jp-lora-{timestamp}" print(f"Saving LoRA adapter to Hub as {new_model_id}_lora...") try: model.push_to_hub_merged( new_model_id+"_lora", tokenizer=tokenizer, save_method="lora", token=HF_TOKEN, private=True ) print("Successfully saved model to Hub") except Exception as e: print(f"Error saving model to Hub: {e}") if __name__ == "__main__": # 学習の実行 trained_dir = train_model() # 最新のチェックポイントを探す checkpoints = [d for d in os.listdir(trained_dir) if d.startswith('checkpoint-')] latest_checkpoint = max(checkpoints, key=lambda x: int(x.split('-')[1])) checkpoint_path = os.path.join(trained_dir, latest_checkpoint) # 推論の実行 inference(checkpoint_path) # モデルをHubに保存 model, _ = FastLanguageModel.from_pretrained( model_name=model_name, max_seq_length=max_seq_length, dtype="bfloat16", load_in_4bit=True, ) model = PeftModel.from_pretrained(model, checkpoint_path) save_model_to_hub(model, tokenizer, current_time) print(f"学習出力ディレクトリ: {trained_dir}") print(f"推論結果の保存先: {os.path.join(output_dir, f'predictions_{current_time}.jsonl')}") ```