#필요 패키지 설치 !pip install mxnet !pip install gluonnlp==0.8.0 !pip install tqdm pandas !pip install sentencepiece !pip install transformers !pip install torch !pip install numpy==1.23.1 #KoBERT 깃허브에서 불러오기 !pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf' !pip install langchain==0.0.125 chromadb==0.3.14 pypdf==3.7.0 tiktoken==0.3.3 !pip install openai==0.28 !pip install gradio transformers torch opencv-python-headless # import torch from torch import nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import Dataset, DataLoader import gluonnlp as nlp import numpy as np from tqdm import tqdm, tqdm_notebook import pandas as pd # Hugging Face를 통한 모델 및 토크나이저 Import from kobert_tokenizer import KoBERTTokenizer from transformers import BertModel from transformers import AdamW from transformers.optimization import get_cosine_schedule_with_warmup n_devices = torch.cuda.device_count() print(n_devices) for i in range(n_devices): print(torch.cuda.get_device_name(i)) if torch.cuda.is_available(): device = torch.device("cuda") print('There are %d GPU(s) available.' % torch.cuda.device_count()) print('We will use the GPU:', torch.cuda.get_device_name(0)) else: device = torch.device("cpu") print('No GPU available, using the CPU instead.') # Kobert_softmax class BERTClassifier(nn.Module): def __init__(self, bert, hidden_size=768, num_classes=6, dr_rate=None, params=None): super(BERTClassifier, self).__init__() self.bert = bert self.dr_rate = dr_rate self.softmax = nn.Softmax(dim=1) # Softmax로 변경 self.classifier = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(in_features=hidden_size, out_features=512), nn.Linear(in_features=512, out_features=num_classes), ) # 정규화 레이어 추가 (Layer Normalization) self.layer_norm = nn.LayerNorm(768) # 드롭아웃 self.dropout = nn.Dropout(p=dr_rate) def gen_attention_mask(self, token_ids, valid_length): attention_mask = torch.zeros_like(token_ids) for i, v in enumerate(valid_length): attention_mask[i][:v] = 1 return attention_mask.float() def forward(self, token_ids, valid_length, segment_ids): attention_mask = self.gen_attention_mask(token_ids, valid_length) _, pooler = self.bert(input_ids=token_ids, token_type_ids=segment_ids.long(), attention_mask=attention_mask.float().to(token_ids.device)) pooled_output = self.dropout(pooler) normalized_output = self.layer_norm(pooled_output) out = self.classifier(normalized_output) # LayerNorm 적용 pooler = self.layer_norm(pooler) if self.dr_rate: pooler = self.dropout(pooler) logits = self.classifier(pooler) # 분류를 위한 로짓 값 계산 probabilities = self.softmax(logits) # Softmax로 각 클래스의 확률 계산 return probabilities # 각 클래스에 대한 확률 반환 #정의한 모델 불러오기 model = BERTClassifier(bertmodel,dr_rate=0.4).to(device) #model = BERTClassifier(bertmodel, dr_rate=0.5).to('cpu') # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate) loss_fn = nn.CrossEntropyLoss() t_total = len(train_dataloader) * num_epochs warmup_step = int(t_total * warmup_ratio) scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total) def calc_accuracy(X,Y): max_vals, max_indices = torch.max(X, 1) train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0] return train_acc train_dataloader model = torch.load('./model_weights_softmax(model).pth') model.eval() # 멜론 데이터 불러오기 melon_data = pd.read_csv('./melon_data.csv') melon_emotions = pd.read_csv('./melon_emotions_final.csv') melon_emotions = pd.merge(melon_emotions, melon_data, left_on='Title', right_on='title', how='inner') melon_emotions = melon_emotions[['singer', 'Title', 'genre','Emotions']] melon_emotions = melon_emotions.drop_duplicates(subset='Title', keep='first') melon_emotions['Emotions'] = melon_emotions['Emotions'].apply(lambda x: ast.literal_eval(x)) emotions = melon_emotions['Emotions'].to_list() #gradio !pip install --upgrade gradio import numpy as np import pandas as pd import requests from PIL import Image import torch from transformers import AutoProcessor, AutoModelForZeroShotImageClassification, pipeline import gradio as gr import openai from sklearn.metrics.pairwise import cosine_similarity import ast