eneSadi commited on
Commit
32c2259
·
unverified ·
1 Parent(s): 219ad87
Files changed (2) hide show
  1. app.py +8 -31
  2. app_cosmos.py +72 -0
app.py CHANGED
@@ -2,23 +2,17 @@ from fastapi import FastAPI, Request
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
- model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
6
 
7
  print("Model loading started")
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
- torch_dtype=torch.bfloat16,
12
  device_map="auto",
 
13
  )
14
  print("Model loading completed")
15
 
16
- # bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
17
- initial_message = [
18
- {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
19
- # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
20
- ]
21
-
22
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
  print("Selected device:", device)
24
 
@@ -38,35 +32,18 @@ async def ask(request: Request):
38
  return {"error": "Prompt is missing"}
39
 
40
  print("Device of the model:", model.device)
41
- messages = initial_message.copy()
42
- messages.append({"role": "user", "content": f"{prompt}"})
43
-
44
  print("Messages:", messages)
45
  print("Tokenizer process started")
46
- input_ids = tokenizer.apply_chat_template(
47
- messages,
48
- add_generation_prompt=True,
49
- return_tensors="pt"
50
- ).to(model.device)
51
-
52
- terminators = [
53
- tokenizer.eos_token_id,
54
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
55
- ]
56
  print("Tokenizer process completed")
57
 
58
  print("Model process started")
59
- outputs = model.generate(
60
- input_ids,
61
- max_new_tokens=256,
62
- eos_token_id=terminators,
63
- do_sample=True,
64
- temperature=0.6,
65
- top_p=0.9,
66
- )
67
- response = outputs[0][input_ids.shape[-1]:]
68
 
69
  print("Tokenizer decode process started")
70
- answer = tokenizer.decode(response, skip_special_tokens=True)
71
 
72
  return {"answer": answer}
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ model_id = "google/gemma-2-9b-it"
6
 
7
  print("Model loading started")
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
 
11
  device_map="auto",
12
+ torch_dtype=torch.bfloat16,
13
  )
14
  print("Model loading completed")
15
 
 
 
 
 
 
 
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
  print("Selected device:", device)
18
 
 
32
  return {"error": "Prompt is missing"}
33
 
34
  print("Device of the model:", model.device)
35
+ messages = [
36
+ {"role": "user", "content": f"{prompt}"},
37
+ ]
38
  print("Messages:", messages)
39
  print("Tokenizer process started")
40
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True).to("cuda")
 
 
 
 
 
 
 
 
 
41
  print("Tokenizer process completed")
42
 
43
  print("Model process started")
44
+ outputs = model.generate(**input_ids, max_new_tokens=256)
 
 
 
 
 
 
 
 
45
 
46
  print("Tokenizer decode process started")
47
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
 
49
  return {"answer": answer}
app_cosmos.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ model_id = "ytu-ce-cosmos/Turkish-Llama-8b-DPO-v0.1"
6
+
7
+ print("Model loading started")
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id,
11
+ torch_dtype=torch.bfloat16,
12
+ device_map="auto",
13
+ )
14
+ print("Model loading completed")
15
+
16
+ # bu mesaj değiştirilebilir ve chatbotun başlangıç mesajı olarak kullanılabilir
17
+ initial_message = [
18
+ {"role": "system", "content": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak."}
19
+ # Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.
20
+ ]
21
+
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+ print("Selected device:", device)
24
+
25
+ app = FastAPI()
26
+
27
+
28
+ @app.get('/')
29
+ def home():
30
+ return {"hello": "Bitfumes"}
31
+
32
+
33
+ @app.post('/ask')
34
+ async def ask(request: Request):
35
+ data = await request.json()
36
+ prompt = data.get("prompt")
37
+ if not prompt:
38
+ return {"error": "Prompt is missing"}
39
+
40
+ print("Device of the model:", model.device)
41
+ messages = initial_message.copy()
42
+ messages.append({"role": "user", "content": f"{prompt}"})
43
+
44
+ print("Messages:", messages)
45
+ print("Tokenizer process started")
46
+ input_ids = tokenizer.apply_chat_template(
47
+ messages,
48
+ add_generation_prompt=True,
49
+ return_tensors="pt"
50
+ ).to(model.device)
51
+
52
+ terminators = [
53
+ tokenizer.eos_token_id,
54
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
55
+ ]
56
+ print("Tokenizer process completed")
57
+
58
+ print("Model process started")
59
+ outputs = model.generate(
60
+ input_ids,
61
+ max_new_tokens=256,
62
+ eos_token_id=terminators,
63
+ do_sample=True,
64
+ temperature=0.6,
65
+ top_p=0.9,
66
+ )
67
+ response = outputs[0][input_ids.shape[-1]:]
68
+
69
+ print("Tokenizer decode process started")
70
+ answer = tokenizer.decode(response, skip_special_tokens=True)
71
+
72
+ return {"answer": answer}