Spaces:
Sleeping
Sleeping
minseokKoo
commited on
Commit
ยท
4c479ed
1
Parent(s):
ea6c7bf
Update app.py
Browse files
app.py
CHANGED
@@ -15,11 +15,8 @@ import gradio as gr
|
|
15 |
|
16 |
def greet(co):
|
17 |
code_text = []
|
18 |
-
|
19 |
-
|
20 |
-
if not code:
|
21 |
-
break
|
22 |
-
code_text.append(code)
|
23 |
|
24 |
code_text = ' '.join(code_text)
|
25 |
code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
|
@@ -27,7 +24,7 @@ def greet(co):
|
|
27 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
28 |
|
29 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
30 |
-
path = 'models/CFA-CodeBERTa-small.pt'
|
31 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
32 |
input_ids = tokenizer.encode(
|
33 |
code_text, max_length=512, truncation=True, padding='max_length')
|
@@ -39,7 +36,7 @@ def greet(co):
|
|
39 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
40 |
|
41 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
42 |
-
path = 'models/CFA-codebert-c.pt'
|
43 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
44 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
45 |
truncation=True, return_token_type_ids=True)['input_ids']
|
@@ -50,7 +47,7 @@ def greet(co):
|
|
50 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
51 |
|
52 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
53 |
-
path = 'models/CFA-codebert-c-v2.pt'
|
54 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
55 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
56 |
truncation=True, return_token_type_ids=True)['input_ids']
|
@@ -61,7 +58,7 @@ def greet(co):
|
|
61 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
62 |
|
63 |
# 4. codeT5 finetuning model
|
64 |
-
path = 'models/CFA-codeT5'
|
65 |
model_params = {
|
66 |
# model_type: t5-base/t5-large
|
67 |
"MODEL": path,
|
@@ -83,8 +80,11 @@ def greet(co):
|
|
83 |
# ensemble
|
84 |
tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
|
85 |
pred_3 * 0.1 + pred_4 * 0.1).argmax()
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
return tot_result
|
88 |
|
89 |
|
90 |
|
@@ -200,25 +200,30 @@ with gr.Blocks() as demo1:
|
|
200 |
|
201 |
gr.Markdown(
|
202 |
"""
|
203 |
-
์ ์ ๋ถ์๊ธฐ๋ก ์ค๋ฅ๋ผ๊ณ ๋ณด๊ณ ๋ ์ฝ๋๋ฅผ
|
204 |
์ค๋ฅ๊ฐ True-positive ์ธ์ง False-positive ์ธ์ง ๋ถ๋ฅ ํด ์ฃผ๋ ํ๋ก๊ทธ๋จ์ด๋ค.
|
205 |
""")
|
206 |
|
207 |
with gr.Accordion(label='๋ชจ๋ธ์ ๋ํ ์ค๋ช
( ์ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ์์ค. )',open=False):
|
208 |
gr.Markdown(
|
209 |
"""
|
210 |
-
์ด 3๊ฐ์ ๋ชจ๋ธ์
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
212 |
"""
|
213 |
)
|
214 |
with gr.Row():
|
215 |
with gr.Column():
|
216 |
-
inputs_1 = gr.Textbox(placeholder="์ฝ๋๋ฅผ ์
๋ ฅํ์์ค.", label='
|
217 |
with gr.Row():
|
218 |
-
btn = gr.Button("
|
219 |
with gr.Column():
|
220 |
-
outputs_1 = gr.
|
221 |
btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
|
222 |
|
223 |
if __name__ == "__main__":
|
224 |
-
demo1.launch()
|
|
|
15 |
|
16 |
def greet(co):
|
17 |
code_text = []
|
18 |
+
|
19 |
+
code_text.append(co)
|
|
|
|
|
|
|
20 |
|
21 |
code_text = ' '.join(code_text)
|
22 |
code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
|
|
|
24 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
25 |
|
26 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
27 |
+
path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
|
28 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
29 |
input_ids = tokenizer.encode(
|
30 |
code_text, max_length=512, truncation=True, padding='max_length')
|
|
|
36 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
37 |
|
38 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
39 |
+
path = os.getcwd() + '/models/CFA-codebert-c.pt'
|
40 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
41 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
42 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
47 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
48 |
|
49 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
50 |
+
path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
|
51 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
52 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
53 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
58 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
59 |
|
60 |
# 4. codeT5 finetuning model
|
61 |
+
path = os.getcwd() + '/models/CFA-codeT5'
|
62 |
model_params = {
|
63 |
# model_type: t5-base/t5-large
|
64 |
"MODEL": path,
|
|
|
80 |
# ensemble
|
81 |
tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
|
82 |
pred_3 * 0.1 + pred_4 * 0.1).argmax()
|
83 |
+
if tot_result == 0:
|
84 |
+
return "false positive !!"
|
85 |
+
else:
|
86 |
+
return "true positive !!"
|
87 |
|
|
|
88 |
|
89 |
|
90 |
|
|
|
200 |
|
201 |
gr.Markdown(
|
202 |
"""
|
203 |
+
์ ์ ๋ถ์๊ธฐ๋ก ์ค๋ฅ๋ผ๊ณ ๋ณด๊ณ ๋ ์ฝ๋๋ฅผ ์
๋ ฅํ๋ฉด,
|
204 |
์ค๋ฅ๊ฐ True-positive ์ธ์ง False-positive ์ธ์ง ๋ถ๋ฅ ํด ์ฃผ๋ ํ๋ก๊ทธ๋จ์ด๋ค.
|
205 |
""")
|
206 |
|
207 |
with gr.Accordion(label='๋ชจ๋ธ์ ๋ํ ์ค๋ช
( ์ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ์์ค. )',open=False):
|
208 |
gr.Markdown(
|
209 |
"""
|
210 |
+
์ด 3๊ฐ์ ๋ชจ๋ธ์ ์ฌ์ฉํ์๋ค.
|
211 |
+
1. codeBERTa-small-v1
|
212 |
+
- codeBERTa-small-v1 ์ค๋ช
|
213 |
+
2. codeBERT - C
|
214 |
+
- codeBERT - C ์ค๋ช
|
215 |
+
3. codeT5
|
216 |
+
- codeT5 ์ค๋ช
|
217 |
"""
|
218 |
)
|
219 |
with gr.Row():
|
220 |
with gr.Column():
|
221 |
+
inputs_1 = gr.Textbox(placeholder="์ฝ๋๋ฅผ ์
๋ ฅํ์์ค.", label='Code')
|
222 |
with gr.Row():
|
223 |
+
btn = gr.Button("๊ฒฐ๊ณผ ์ถ๋ ฅ")
|
224 |
with gr.Column():
|
225 |
+
outputs_1 = gr.Text(label = 'Result')
|
226 |
btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
|
227 |
|
228 |
if __name__ == "__main__":
|
229 |
+
demo1.launch(share=True)
|