Spaces:
Sleeping
Sleeping
File size: 2,117 Bytes
219ad87 c65cfb2 eb9fa2c ff9863c 7dea212 ff9863c c65cfb2 aca3716 c65cfb2 32c2259 ff9863c c65cfb2 aca3716 c65cfb2 ff9863c c65cfb2 aca3716 c65cfb2 219ad87 aca3716 ff9863c aca3716 ff9863c aca3716 c65cfb2 ff9863c c65cfb2 ff9863c c65cfb2 219ad87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
print("COSMOS Llama Chatbot is starting...")
model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
print("Model loading started")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
print("Model loading completed")
# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
initial_message = [
{"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
# Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Selected device:", device)
app = FastAPI()
@app.get('/')
def home():
return {"hello": "Bitfumes"}
@app.post('/ask')
async def ask(request: Request):
data = await request.json()
prompt = data.get("prompt")
if not prompt:
return {"error": "Prompt is missing"}
print("Device of the model:", model.device)
messages = initial_message.copy()
messages.append({"role": "user", "content": f"{prompt}"})
print("Messages:", messages)
print("Tokenizer process started")
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
print("Tokenizer process completed")
print("Model process started")
outputs = model.generate(
input_ids,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
temperature=0.6,
top_p=0.9,
)
response = outputs[0][input_ids.shape[-1]:]
print("Tokenizer decode process started")
answer = tokenizer.decode(response, skip_special_tokens=True)
return {"answer": answer} |