Maslov-Artem commited on
Commit
c747562
·
1 Parent(s): cb2adb5

Streamlit adjustment

Browse files
Files changed (2) hide show
  1. pages/review_predictor.py +43 -23
  2. pages/text_generator.py +17 -6
pages/review_predictor.py CHANGED
@@ -7,37 +7,58 @@ import torch
7
  import torch.nn as nn
8
  import transformers
9
 
10
- from model.funcs import (create_model_and_tokenizer, load_model,
11
- predict_sentiment)
12
  from model.model import LSTMConcatAttentionEmbed
13
  from preprocessing.preprocessing import data_preprocessing
14
  from preprocessing.rnn_preprocessing import preprocess_single_string
15
 
16
- # Load preprocessing steps
17
- with open("vectorizer.pkl", "rb") as f:
18
- logreg_vectorizer = pickle.load(f)
19
 
20
- # Load trained model
21
- with open("logreg_model.pkl", "rb") as f:
22
- logreg_predictor = pickle.load(f)
 
23
 
24
- model_concat_embed = LSTMConcatAttentionEmbed()
25
- model_concat_embed.load_state_dict(torch.load("model/model_weights.pt"))
 
26
 
27
- with open("model/vocab.json", "r") as f:
28
- vocab_to_int = json.load(f)
29
 
30
- with open("model/int_vocab.json", "r") as f:
31
- int_to_vocab = json.load(f)
32
 
33
- model_class = transformers.AutoModel
34
- tokenizer_class = transformers.AutoTokenizer
35
- pretrained_weights = "cointegrated/rubert-tiny2"
36
- weights_path = "model/best_bert_weights.pth"
37
- model = load_model(model_class, pretrained_weights, weights_path)
38
- tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
39
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def plot_and_predict(review: str, SEQ_LEN: int, model: nn.Module):
42
  inp = preprocess_single_string(review, SEQ_LEN, vocab_to_int)
43
  model.eval()
@@ -52,12 +73,12 @@ def preprocess_text_logreg(text):
52
  clean_text = data_preprocessing(
53
  text
54
  ) # Assuming data_preprocessing is your preprocessing function
55
- print("Clean text ", clean_text)
56
  vectorized_text = logreg_vectorizer.transform([" ".join(clean_text)])
57
  return vectorized_text
58
 
59
 
60
  # Define function for making predictions
 
61
  def predict_sentiment_logreg(text):
62
  # Preprocess input text
63
  processed_text = preprocess_text_logreg(text)
@@ -68,7 +89,7 @@ def predict_sentiment_logreg(text):
68
 
69
  metrics = {
70
  "Models": ["Logistic Regression", "LSTM + attention", "ruBERTtiny2"],
71
- "f1-macro score": [0.94376, 1, 0.94070],
72
  }
73
 
74
 
@@ -94,7 +115,6 @@ if st.button("Predict"):
94
  )
95
  elif model_type == "BERT":
96
  prediction = predict_sentiment(text_input, model, tokenizer, "cpu")
97
- st.write(prediction)
98
 
99
  if prediction == 1:
100
  st.write("prediction")
 
7
  import torch.nn as nn
8
  import transformers
9
 
10
+ from model.funcs import (create_model_and_tokenizer, execution_time,
11
+ load_model, predict_sentiment)
12
  from model.model import LSTMConcatAttentionEmbed
13
  from preprocessing.preprocessing import data_preprocessing
14
  from preprocessing.rnn_preprocessing import preprocess_single_string
15
 
 
 
 
16
 
17
+ @st.cache_resource
18
+ def load_logreg():
19
+ with open("vectorizer.pkl", "rb") as f:
20
+ logreg_vectorizer = pickle.load(f)
21
 
22
+ with open("logreg_model.pkl", "rb") as f:
23
+ logreg_predictor = pickle.load(f)
24
+ return logreg_vectorizer, logreg_predictor
25
 
 
 
26
 
27
+ logreg_vectorizer, logreg_predictor = load_logreg()
 
28
 
 
 
 
 
 
 
29
 
30
+ @st.cache_resource
31
+ def load_lstm():
32
+ with open("model/vocab.json", "r") as f:
33
+ vocab_to_int = json.load(f)
34
 
35
+ with open("model/int_vocab.json", "r") as f:
36
+ int_to_vocab = json.load(f)
37
+ model_concat_embed = LSTMConcatAttentionEmbed()
38
+ model_concat_embed.load_state_dict(torch.load("model/model_weights.pt"))
39
+
40
+ return vocab_to_int, int_to_vocab, model_concat_embed
41
+
42
+
43
+ vocab_to_int, int_to_vocab, model_concat_embed = load_lstm()
44
+
45
+
46
+ @st.cache_resource
47
+ def load_bert():
48
+ model_class = transformers.AutoModel
49
+ tokenizer_class = transformers.AutoTokenizer
50
+ pretrained_weights = "cointegrated/rubert-tiny2"
51
+ weights_path = "model/best_bert_weights.pth"
52
+ model = load_model(model_class, pretrained_weights, weights_path)
53
+ tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
54
+
55
+ return model, tokenizer
56
+
57
+
58
+ model, tokenizer = load_bert()
59
+
60
+
61
+ @execution_time
62
  def plot_and_predict(review: str, SEQ_LEN: int, model: nn.Module):
63
  inp = preprocess_single_string(review, SEQ_LEN, vocab_to_int)
64
  model.eval()
 
73
  clean_text = data_preprocessing(
74
  text
75
  ) # Assuming data_preprocessing is your preprocessing function
 
76
  vectorized_text = logreg_vectorizer.transform([" ".join(clean_text)])
77
  return vectorized_text
78
 
79
 
80
  # Define function for making predictions
81
+ @execution_time
82
  def predict_sentiment_logreg(text):
83
  # Preprocess input text
84
  processed_text = preprocess_text_logreg(text)
 
89
 
90
  metrics = {
91
  "Models": ["Logistic Regression", "LSTM + attention", "ruBERTtiny2"],
92
+ "f1-macro score": [0.94376, 0.93317, 0.94070],
93
  }
94
 
95
 
 
115
  )
116
  elif model_type == "BERT":
117
  prediction = predict_sentiment(text_input, model, tokenizer, "cpu")
 
118
 
119
  if prediction == 1:
120
  st.write("prediction")
pages/text_generator.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import torch
3
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
4
 
 
 
5
 
6
  @st.cache_data
7
  def load_model():
@@ -13,11 +15,10 @@ def load_model():
13
 
14
 
15
  tokenizer, model = load_model()
16
- promt = st.text_input("Ask a question")
17
- generate = st.button("Generate")
18
- if generate:
19
- if not promt:
20
- st.write("42")
21
  promt = tokenizer.encode(promt, return_tensors="pt")
22
  model.eval()
23
  with torch.no_grad():
@@ -27,6 +28,16 @@ if generate:
27
  num_beams=2,
28
  temperature=1.5,
29
  top_p=0.9,
 
30
  )
31
  out = list(map(tokenizer.decode, out))[0]
32
- st.write(out)
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
4
 
5
+ from model.funcs import execution_time
6
+
7
 
8
  @st.cache_data
9
  def load_model():
 
15
 
16
 
17
  tokenizer, model = load_model()
18
+
19
+
20
+ @execution_time
21
+ def generate_text(promt):
 
22
  promt = tokenizer.encode(promt, return_tensors="pt")
23
  model.eval()
24
  with torch.no_grad():
 
28
  num_beams=2,
29
  temperature=1.5,
30
  top_p=0.9,
31
+ max_length=150,
32
  )
33
  out = list(map(tokenizer.decode, out))[0]
34
+ return out
35
+
36
+
37
+ promt = st.text_input("Ask a question")
38
+ generate = st.button("Generate")
39
+ if generate:
40
+ if not promt:
41
+ st.write("42")
42
+ else:
43
+ st.write(generate_text(promt))