dazzleun-7 commited on
Commit
1f5d6fb
ยท
verified ยท
1 Parent(s): 4717c6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py CHANGED
@@ -1,5 +1,149 @@
1
  # gradio final ver ----------------------------
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ###### ๊ธฐ๋ณธ ์„ค์ • ######
4
  # OpenAI API ํ‚ค ์„ค์ •
5
  openai.api_key = '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'
 
1
  # gradio final ver ----------------------------
2
 
3
+ #ํ•„์š” ํŒจํ‚ค์ง€ ์„ค์น˜
4
+ !pip install mxnet
5
+ !pip install gluonnlp==0.8.0
6
+ !pip install tqdm pandas
7
+ !pip install sentencepiece
8
+ !pip install transformers
9
+ !pip install torch
10
+ !pip install numpy==1.23.1
11
+
12
+ #KoBERT ๊นƒํ—ˆ๋ธŒ์—์„œ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
13
+ !pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf'
14
+
15
+ !pip install langchain==0.0.125 chromadb==0.3.14 pypdf==3.7.0 tiktoken==0.3.3
16
+ !pip install openai==0.28
17
+ !pip install gradio transformers torch opencv-python-headless
18
+
19
+ # import torch
20
+ from torch import nn
21
+ import torch.nn.functional as F
22
+ import torch.optim as optim
23
+ from torch.utils.data import Dataset, DataLoader
24
+ import gluonnlp as nlp
25
+ import numpy as np
26
+ from tqdm import tqdm, tqdm_notebook
27
+ import pandas as pd
28
+
29
+ # Hugging Face๋ฅผ ํ†ตํ•œ ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € Import
30
+ from kobert_tokenizer import KoBERTTokenizer
31
+ from transformers import BertModel
32
+
33
+ from transformers import AdamW
34
+ from transformers.optimization import get_cosine_schedule_with_warmup
35
+
36
+ n_devices = torch.cuda.device_count()
37
+ print(n_devices)
38
+
39
+ for i in range(n_devices):
40
+ print(torch.cuda.get_device_name(i))
41
+
42
+ if torch.cuda.is_available():
43
+ device = torch.device("cuda")
44
+ print('There are %d GPU(s) available.' % torch.cuda.device_count())
45
+ print('We will use the GPU:', torch.cuda.get_device_name(0))
46
+ else:
47
+ device = torch.device("cpu")
48
+ print('No GPU available, using the CPU instead.')
49
+
50
+
51
+ # Kobert_softmax
52
+ class BERTClassifier(nn.Module):
53
+ def __init__(self,
54
+ bert,
55
+ hidden_size=768,
56
+ num_classes=6,
57
+ dr_rate=None,
58
+ params=None):
59
+ super(BERTClassifier, self).__init__()
60
+ self.bert = bert
61
+ self.dr_rate = dr_rate
62
+ self.softmax = nn.Softmax(dim=1) # Softmax๋กœ ๋ณ€๊ฒฝ
63
+ self.classifier = nn.Sequential(
64
+ nn.Dropout(p=0.5),
65
+ nn.Linear(in_features=hidden_size, out_features=512),
66
+ nn.Linear(in_features=512, out_features=num_classes),
67
+ )
68
+
69
+ # ์ •๊ทœํ™” ๋ ˆ์ด์–ด ์ถ”๊ฐ€ (Layer Normalization)
70
+ self.layer_norm = nn.LayerNorm(768)
71
+
72
+ # ๋“œ๋กญ์•„์›ƒ
73
+ self.dropout = nn.Dropout(p=dr_rate)
74
+
75
+ def gen_attention_mask(self, token_ids, valid_length):
76
+ attention_mask = torch.zeros_like(token_ids)
77
+ for i, v in enumerate(valid_length):
78
+ attention_mask[i][:v] = 1
79
+ return attention_mask.float()
80
+
81
+ def forward(self, token_ids, valid_length, segment_ids):
82
+ attention_mask = self.gen_attention_mask(token_ids, valid_length)
83
+ _, pooler = self.bert(input_ids=token_ids, token_type_ids=segment_ids.long(), attention_mask=attention_mask.float().to(token_ids.device))
84
+
85
+ pooled_output = self.dropout(pooler)
86
+ normalized_output = self.layer_norm(pooled_output)
87
+ out = self.classifier(normalized_output)
88
+
89
+ # LayerNorm ์ ์šฉ
90
+ pooler = self.layer_norm(pooler)
91
+
92
+ if self.dr_rate:
93
+ pooler = self.dropout(pooler)
94
+
95
+ logits = self.classifier(pooler) # ๋ถ„๋ฅ˜๋ฅผ ์œ„ํ•œ ๋กœ์ง“ ๊ฐ’ ๊ณ„์‚ฐ
96
+ probabilities = self.softmax(logits) # Softmax๋กœ ๊ฐ ํด๋ž˜์Šค์˜ ํ™•๋ฅ  ๊ณ„์‚ฐ
97
+ return probabilities # ๊ฐ ํด๋ž˜์Šค์— ๋Œ€ํ•œ ํ™•๋ฅ  ๋ฐ˜ํ™˜
98
+
99
+ #์ •์˜ํ•œ ๋ชจ๋ธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
100
+ model = BERTClassifier(bertmodel,dr_rate=0.4).to(device)
101
+ #model = BERTClassifier(bertmodel, dr_rate=0.5).to('cpu')
102
+
103
+ # Prepare optimizer and schedule (linear warmup and decay)
104
+ no_decay = ['bias', 'LayerNorm.weight']
105
+ optimizer_grouped_parameters = [
106
+ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
107
+ {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
108
+ ]
109
+ optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
110
+ loss_fn = nn.CrossEntropyLoss()
111
+ t_total = len(train_dataloader) * num_epochs
112
+ warmup_step = int(t_total * warmup_ratio)
113
+ scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)
114
+ def calc_accuracy(X,Y):
115
+ max_vals, max_indices = torch.max(X, 1)
116
+ train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
117
+ return train_acc
118
+ train_dataloader
119
+
120
+ model = torch.load('./model_weights_softmax(model).pth')
121
+ model.eval()
122
+
123
+ # ๋ฉœ๋ก  ๋ฐ์ดํ„ฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
124
+
125
+ melon_data = pd.read_csv('./melon_data.csv')
126
+ melon_emotions = pd.read_csv('./melon_emotions_final.csv')
127
+ melon_emotions = pd.merge(melon_emotions, melon_data, left_on='Title', right_on='title', how='inner')
128
+ melon_emotions = melon_emotions[['singer', 'Title', 'genre','Emotions']]
129
+ melon_emotions = melon_emotions.drop_duplicates(subset='Title', keep='first')
130
+ melon_emotions['Emotions'] = melon_emotions['Emotions'].apply(lambda x: ast.literal_eval(x))
131
+
132
+ emotions = melon_emotions['Emotions'].to_list()
133
+
134
+ #gradio
135
+ !pip install --upgrade gradio
136
+ import numpy as np
137
+ import pandas as pd
138
+ import requests
139
+ from PIL import Image
140
+ import torch
141
+ from transformers import AutoProcessor, AutoModelForZeroShotImageClassification, pipeline
142
+ import gradio as gr
143
+ import openai
144
+ from sklearn.metrics.pairwise import cosine_similarity
145
+ import ast
146
+
147
  ###### ๊ธฐ๋ณธ ์„ค์ • ######
148
  # OpenAI API ํ‚ค ์„ค์ •
149
  openai.api_key = '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'