Spaces:

eneSadi
/

cosmos-llama-flask

Sleeping

App Files Files Community

eneSadi commited on Nov 7, 2024

Commit

32c2259

unverified ·

1 Parent(s): 219ad87

gemma

Browse files

Files changed (2) hide show

app.py +8 -31
app_cosmos.py +72 -0

app.py CHANGED Viewed

@@ -2,23 +2,17 @@ from fastapi import FastAPI, Request
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
 )
 print("Model loading completed")
-# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
-initial_message = [
-    {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
-    # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
-]
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
@@ -38,35 +32,18 @@ async def ask(request: Request):
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
-    messages = initial_message.copy()
-    messages.append({"role": "user", "content": f"{prompt}"})
     print("Messages:", messages)
     print("Tokenizer process started")
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    terminators = [
-        tokenizer.eos_token_id,
-        tokenizer.convert_tokens_to_ids("<|eot_id|>")
-    ]
     print("Tokenizer process completed")
     print("Model process started")
-    outputs = model.generate(
-        input_ids,
-        max_new_tokens=256,
-        eos_token_id=terminators,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.9,
-    )
-    response = outputs[0][input_ids.shape[-1]:]
     print("Tokenizer decode process started")
-    answer = tokenizer.decode(response, skip_special_tokens=True)
     return {"answer": answer}

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+model_id = "google/gemma-2-9b-it"
 print("Model loading started")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16,
 )
 print("Model loading completed")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Selected device:", device)
         return {"error": "Prompt is missing"}
     print("Device of the model:", model.device)
+    messages = [
+        {"role": "user", "content": f"{prompt}"},
+    ]
     print("Messages:", messages)
     print("Tokenizer process started")
+    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
     print("Tokenizer process completed")
     print("Model process started")
+    outputs = model.generate(**input_ids, max_new_tokens=256)
     print("Tokenizer decode process started")
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return {"answer": answer}

app_cosmos.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from fastapi import FastAPI, Request
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
+print("Model loading started")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+print("Model loading completed")
+# bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
+initial_message = [
+    {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
+    # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
+]
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print("Selected device:", device)
+app = FastAPI()
+@app.get('/')
+def home():
+    return {"hello": "Bitfumes"}
+@app.post('/ask')
+async def ask(request: Request):
+    data = await request.json()
+    prompt = data.get("prompt")
+    if not prompt:
+        return {"error": "Prompt is missing"}
+    print("Device of the model:", model.device)
+    messages = initial_message.copy()
+    messages.append({"role": "user", "content": f"{prompt}"})
+    print("Messages:", messages)
+    print("Tokenizer process started")
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(model.device)
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    print("Tokenizer process completed")
+    print("Model process started")
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=256,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.9,
+    )
+    response = outputs[0][input_ids.shape[-1]:]
+    print("Tokenizer decode process started")
+    answer = tokenizer.decode(response, skip_special_tokens=True)
+    return {"answer": answer}