Siyunb323's picture
Updating
3c5573d
import os
import torch
from torch import nn
from transformers import AutoModel
from huggingface_hub import hf_hub_download
token=os.getenv("HF_TOKEN")
repo_id = "Siyunb323/CreativityEvaluation"
model = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
class BERTregressor(nn.Module):
def __init__(self, bert, hidden_size=768, num_linear=1, dropout=0.1,
o_type='cls', t_type= 'C', use_sigmoid=False):
super(BERTregressor, self).__init__()
self.encoder = bert
self.o_type = o_type
self.t_type = t_type
self.sigmoid = use_sigmoid
if num_linear == 2:
layers = [nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 1)]
elif num_linear == 1:
layers = [nn.Dropout(dropout),
nn.Linear(hidden_size, 1)]
if use_sigmoid:
layers.append(nn.Sigmoid())
self.output = nn.Sequential(*layers)
def forward(self, inputs, return_attention=False):
X = {'input_ids':inputs['input_ids'],
'token_type_ids':inputs['token_type_ids'],
'attention_mask':inputs['attention_mask'],
'output_attentions':return_attention}
encoded_X = self.encoder(**X)
if self.o_type == 'cls':
output = self.output(encoded_X.last_hidden_state[:, 0, :])
elif self.o_type == 'pooler':
output = self.output(encoded_X.pooler_output)
output = 4 * output.squeeze(-1) + 1 if self.sigmoid and self.t_type == 'C' else output.squeeze(-1)
return output if not return_attention else (output, encoded_X.attentions)
class Effectiveness(nn.Module):
def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
super(Effectiveness, self).__init__(**kwargs)
self.sigmoid = use_sigmoid
if num_layers == 2:
layers = [
nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 1)
]
else:
layers = [
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_size, 1)
]
if use_sigmoid:
layers.append(nn.Sigmoid()) # 仅在需要时添加 Sigmoid 层
self.output = nn.Sequential(*layers)
def forward(self, X):
output = self.output(X)
# 如果使用 Sigmoid 层,调整输出范围到 [1, 5]
if self.sigmoid:
return 4 * output.squeeze(-1) + 1
else:
return output.squeeze(-1)
class Creativity(nn.Module):
"""BERT的下一句预测任务"""
def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
super(Creativity, self).__init__(**kwargs)
self.sigmoid = use_sigmoid
if num_layers == 2:
layers = [
nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 1)
]
else:
layers = [
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_size, 1)
]
if use_sigmoid:
layers.append(nn.Sigmoid()) # 仅在需要时添加 Sigmoid 层
self.output = nn.Sequential(*layers)
def forward(self, X):
output = self.output(X)
# 如果使用 Sigmoid 层,调整输出范围到 [1, 5]
if self.sigmoid:
return 4 * output.squeeze(-1) + 1
else:
return output.squeeze(-1)
class BERT2Phase(nn.Module):
def __init__(self, bert, hidden_size=768, type='cls',
num_linear=1, dropout=0.1, use_sigmoid=False):
super(BERT2Phase, self).__init__()
self.encoder = bert
self.type = type
self.sigmoid = use_sigmoid
self.effectiveness = Effectiveness(num_linear, hidden_size, use_sigmoid, dropout)
self.creativity = Creativity(num_linear, hidden_size, use_sigmoid, dropout)
def forward(self, inputs, return_attention=False):
X = {'input_ids':inputs['input_ids'],
'token_type_ids':inputs['token_type_ids'],
'attention_mask':inputs['attention_mask'],
'output_attentions':return_attention}
encoded_X = self.encoder(**X)
if self.type == 'cls':
e_pred = self.effectiveness(encoded_X.last_hidden_state[:, 0, :])
c_pred = self.creativity(encoded_X.last_hidden_state[:, 0, :])
elif self.type == 'pooler':
e_pred = self.effectiveness(encoded_X.pooler_output)
c_pred = self.creativity(encoded_X.pooler_output)
return (c_pred, e_pred) if not return_attention else (c_pred, e_pred, encoded_X.attentions)
def load_model(model_name, pooling_method):
pooling = pooling_method if pooling_method == 'cls' else 'pooler'
if model_name == "One-phase Fine-tuned BERT":
loaded_net = BERTregressor(model, hidden_size=768, num_linear=1, dropout=0.1, o_type=pooling, t_type='C', use_sigmoid=True)
filename = 'model' + f"/OnePhase_BERT_{pooling_method}.pth"
elif model_name == "Two-phase Fine-tuned BERT":
loaded_net = BERT2Phase(model, hidden_size=768, num_linear=1, dropout=0.1, type=pooling, use_sigmoid=True)
filename = 'model' + f"/TwoPhase_BERT_{pooling_method}.pth"
model_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
loaded_net.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
loaded_net.eval()
return loaded_net