Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig | |
import torch | |
# グローバル変数の初期化 | |
model = None | |
tokenizer = None | |
# Hugging Face トークンの取得 | |
HUGGING_FACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN') | |
if not HUGGING_FACE_TOKEN: | |
raise ValueError("環境変数 HUGGING_FACE_TOKEN が設定されていません") | |
def load_model(): | |
global model, tokenizer | |
if model is None: | |
model_name = "Guchyos/gemma-2b-elyza-task" | |
try: | |
# まずモデルの設定を読み込む | |
config = AutoConfig.from_pretrained( | |
model_name, | |
token=HUGGING_FACE_TOKEN, | |
trust_remote_code=True | |
) | |
# トークナイザーの読み込み | |
tokenizer = AutoTokenizer.from_pretrained( | |
model_name, | |
token=HUGGING_FACE_TOKEN, | |
trust_remote_code=True | |
) | |
# モデルの読み込み | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
config=config, | |
torch_dtype=torch.float32, | |
device_map="cpu", | |
token=HUGGING_FACE_TOKEN, | |
load_in_8bit=False, | |
load_in_4bit=False, | |
trust_remote_code=True | |
) | |
# モデルを評価モードに設定 | |
model.eval() | |
except Exception as e: | |
raise Exception(f"モデルの読み込みに失敗しました: {str(e)}") | |
return model, tokenizer | |
def predict(message, history): | |
try: | |
model, tokenizer = load_model() | |
prompt = f"質問: {message}\n\n回答:" | |
inputs = tokenizer(prompt, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=128, | |
do_sample=False, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response.replace(prompt, "").strip() | |
except Exception as e: | |
return f"エラーが発生しました: {str(e)}" | |
# Gradioインターフェースの設定 | |
demo = gr.ChatInterface( | |
fn=predict, | |
title="💬 Gemma 2 for ELYZA-tasks", | |
description="ELYZA-tasks-100-TV用に最適化された日本語LLMです" | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |