kimmeoungjun commited on
Commit
a0352f8
·
1 Parent(s): e93380c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+
4
+ from peft import PeftModel, PeftConfig
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+
7
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
8
+ peft_model_id = "kimmeoungjun/qlora-koalpaca2"
9
+ config = PeftConfig.from_pretrained(peft_model_id)
10
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
11
+ model = PeftModel.from_pretrained(model, peft_model_id).to(device)
12
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
13
+
14
+ def generate(q):
15
+ inputs = tokenizer(f"### 질문: {q}\n\n### 답변:", return_tensors='pt', return_token_type_ids=False)
16
+ outputs = model.generate(
17
+ **{k: v.to(device) for k, v in inputs.items()},
18
+ max_new_tokens=256,
19
+ do_sample=True,
20
+ eos_token_id=2,
21
+ )
22
+ result = tokenizer.decode(outputs[0])
23
+ answer_idx = result.find("### 답변:")
24
+ answer = result[answer_idx + 7:].strip()
25
+ return answer
26
+
27
+ gr.Interface(generate, "text", "text").launch(share=True)