Spaces:
Sleeping
Sleeping
import json | |
import pickle | |
import pandas as pd | |
import streamlit as st | |
import torch | |
import torch.nn as nn | |
import transformers | |
from model.funcs import (create_model_and_tokenizer, load_model, | |
predict_sentiment) | |
from model.model import LSTMConcatAttentionEmbed | |
from preprocessing.preprocessing import data_preprocessing | |
from preprocessing.rnn_preprocessing import preprocess_single_string | |
# Load preprocessing steps | |
with open("vectorizer.pkl", "rb") as f: | |
logreg_vectorizer = pickle.load(f) | |
# Load trained model | |
with open("logreg_model.pkl", "rb") as f: | |
logreg_predictor = pickle.load(f) | |
model_concat_embed = LSTMConcatAttentionEmbed() | |
model_concat_embed.load_state_dict(torch.load("model/model_weights.pt")) | |
with open("model/vocab.json", "r") as f: | |
vocab_to_int = json.load(f) | |
with open("model/int_vocab.json", "r") as f: | |
int_to_vocab = json.load(f) | |
model_class = transformers.AutoModel | |
tokenizer_class = transformers.AutoTokenizer | |
pretrained_weights = "cointegrated/rubert-tiny2" | |
weights_path = "model/best_bert_weights.pth" | |
model = load_model(model_class, pretrained_weights, weights_path) | |
tokenizer = tokenizer_class.from_pretrained(pretrained_weights) | |
def plot_and_predict(review: str, SEQ_LEN: int, model: nn.Module): | |
inp = preprocess_single_string(review, SEQ_LEN, vocab_to_int) | |
model.eval() | |
with torch.inference_mode(): | |
pred, _ = model(inp.long().unsqueeze(0)) | |
pred = pred.sigmoid().item() | |
return 1 if pred > 0.75 else 0 | |
def preprocess_text_logreg(text): | |
# Apply preprocessing steps (cleaning, tokenization, vectorization) | |
clean_text = data_preprocessing( | |
text | |
) # Assuming data_preprocessing is your preprocessing function | |
print("Clean text ", clean_text) | |
vectorized_text = logreg_vectorizer.transform([" ".join(clean_text)]) | |
return vectorized_text | |
# Define function for making predictions | |
def predict_sentiment_logreg(text): | |
# Preprocess input text | |
processed_text = preprocess_text_logreg(text) | |
# Make prediction | |
prediction = logreg_predictor.predict(processed_text) | |
return prediction | |
metrics = { | |
"Models": ["Logistic Regression", "LSTM + attention", "ruBERTtiny2"], | |
"f1-macro score": [0.94376, 1, 0.94070], | |
} | |
col1, col2 = st.columns([1, 3]) | |
df = pd.DataFrame(metrics) | |
df.set_index("Models", inplace=True) | |
df.index.name = "Model" | |
st.sidebar.title("Model Selection") | |
model_type = st.sidebar.radio("Select Model Type", ["Classic ML", "LSTM", "BERT"]) | |
st.title("Review Prediction") | |
# Streamlit app code | |
st.title("Sentiment Analysis with Logistic Regression") | |
text_input = st.text_input("Enter your review:") | |
if st.button("Predict"): | |
if model_type == "Classic ML": | |
prediction = predict_sentiment_logreg(text_input) | |
elif model_type == "LSTM": | |
prediction = plot_and_predict( | |
review=text_input, SEQ_LEN=25, model=model_concat_embed | |
) | |
elif model_type == "BERT": | |
prediction = predict_sentiment(text_input, model, tokenizer, "cpu") | |
st.write(prediction) | |
if prediction == 1: | |
st.write("prediction") | |
st.write("Отзыв положительный") | |
elif prediction == 0: | |
st.write("prediction") | |
st.write("Отзыв отрицательный") | |
st.write(df) | |