Maslov-Artem commited on
Commit
b90441b
·
1 Parent(s): c747562

New weights and streamlit features

Browse files
.gitignore CHANGED
@@ -1,6 +1,17 @@
1
  .venv
2
  healthcare_facilities_reviews.jsonl
3
  *.ipynb
4
- __pycache__/
5
  *.csv
6
- .ipynb_checkoints/
 
 
 
 
 
 
 
 
 
 
 
 
1
  .venv
2
  healthcare_facilities_reviews.jsonl
3
  *.ipynb
4
+ /__pycache__/
5
  *.csv
6
+ /.ipynb_checkoints/
7
+ .DS_Store
8
+ RNN/
9
+ bert/
10
+ cached_lm_GPT2Tokenizer_64_wiki_content.txt
11
+ cached_lm_GPT2Tokenizer_64_wiki_content.txt.lock
12
+ finetuned/
13
+ .gitattributes
14
+ *.txt
15
+ model/.ipynb_checkpoints/
16
+ model/__pycache__/
17
+ preprocessing/__pycache__/
17/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "/content/drive/MyDrive/model__weights",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
17/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a99f27f7efc5a609d3bb2f30d15980d3384ecd47f4b0806c251523071a7648a
3
  size 500941440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39686188a07e05ea4860c12df5bb451c630233ccebeee26dc24a4c3219b3b53
3
  size 500941440
app.py CHANGED
@@ -1,3 +1,50 @@
1
  import streamlit as st
2
 
3
- st.title("Sentiment Analysis with Logistic Regression")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
 
3
+ static_toxicity_path = "https://imagizer.imageshack.com/v2/480x360q70/r/924/L4Ditq.jpg"
4
+ animated_toxicity_path = (
5
+ "https://i.kym-cdn.com/photos/images/original/001/264/967/cdc.gif"
6
+ )
7
+ animated_enlighten_path = "https://gifdb.com/images/high/zen-meditation-chakras-illustration-6lujnenasnfmn8dt.gif"
8
+ static_enlighten_path = "https://imagizer.imageshack.com/v2/668x500q70/r/922/bpoy6G.jpg"
9
+
10
+ # Calculate the column widths dynamically
11
+
12
+
13
+ toxicity_html = f"""
14
+ <div class="toxicity-image-container">
15
+ <a href="review_predictor" target="_self" class="toxicity-link">
16
+ <img src="{static_toxicity_path}" class="toxicity-image" />
17
+ </a>
18
+ </div>
19
+ <style>
20
+ /* Define the hover state for column 1 */
21
+ .toxicity-image-container:hover .toxicity-image {{
22
+ content: url("{animated_toxicity_path}");
23
+ transform: scale(1.1); /* Enlarge the image by 10% */
24
+ transition: transform 0.5s ease; /* Add smooth transition */
25
+ }}
26
+ </style>
27
+ """
28
+
29
+ enlighten_html = f"""
30
+ <div class="enlighten-image-container">
31
+ <a href="text_generator" target="_self" class="enlighten-link">
32
+ <img src="{static_enlighten_path}" class="enlighten-image" />
33
+ </a>
34
+ </div>
35
+ <style>
36
+ /* Define the hover state for column 2 */
37
+ .enlighten-image-container:hover .enlighten-image {{
38
+ content: url("{animated_enlighten_path}");
39
+ transform: scale(1.1); /* Enlarge the image by 10% */
40
+ transition: transform 0.5s ease; /* Add smooth transition */
41
+ }}
42
+ </style>
43
+ """
44
+
45
+ # Display HTML code with Streamlit
46
+ st.markdown(toxicity_html, unsafe_allow_html=True)
47
+ st.markdown(enlighten_html, unsafe_allow_html=True)
48
+
49
+
50
+ # Display JavaScript code with Streamlit
enlightened_static.jpg ADDED
model/funcs.py CHANGED
@@ -1,10 +1,27 @@
 
 
 
1
  import matplotlib.pyplot as plt
 
2
  import torch
3
  import torch.nn as nn
4
  from sklearn.metrics import f1_score
5
  from torch.utils.data import Dataset
6
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def create_model_and_tokenizer(model_class, tokenizer_class, pretrained_weights):
9
  # Создаем объекты для токенизатора и модели
10
  tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
@@ -98,6 +115,7 @@ def train_model(
98
  return train_losses, train_accuracies, val_losses, val_accuracies, val_f1_scores
99
 
100
 
 
101
  def predict_sentiment(text, model, tokenizer, DEVICE):
102
  # Модель должна быть в режиме оценки
103
  model.eval()
 
1
+ import time
2
+ from functools import wraps
3
+
4
  import matplotlib.pyplot as plt
5
+ import streamlit as st
6
  import torch
7
  import torch.nn as nn
8
  from sklearn.metrics import f1_score
9
  from torch.utils.data import Dataset
10
 
11
 
12
+ def execution_time(func):
13
+ @wraps(func)
14
+ def wrapper(*args, **kwargs):
15
+ start_time = time.time()
16
+ result = func(*args, **kwargs)
17
+ end_time = time.time()
18
+ execution_seconds = end_time - start_time
19
+ st.write(f"Model calculating time = {execution_seconds:.5f} seconds")
20
+ return result
21
+
22
+ return wrapper
23
+
24
+
25
  def create_model_and_tokenizer(model_class, tokenizer_class, pretrained_weights):
26
  # Создаем объекты для токенизатора и модели
27
  tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
 
115
  return train_losses, train_accuracies, val_losses, val_accuracies, val_f1_scores
116
 
117
 
118
+ @execution_time
119
  def predict_sentiment(text, model, tokenizer, DEVICE):
120
  # Модель должна быть в режиме оценки
121
  model.eval()
model/model_weights.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de960bfb6327e0509297628c3cec5bc456e6dc681b29aca9bead6330e941d44e
3
- size 50489371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d0d9dfdc648de05fb1bd62dab307a558d045305c5fd4700331a0967ea5e1b5
3
+ size 50647220
preprocessing.py DELETED
@@ -1,30 +0,0 @@
1
- import re
2
- import string
3
-
4
- import nltk
5
- import pymorphy2
6
- from nltk.tokenize import word_tokenize
7
-
8
- nltk.download("punkt")
9
-
10
-
11
- def clean_text(text: str) -> str:
12
- text = text.lower()
13
- text = re.sub(r"\w*(\w)\1{2,}\w*", "", text)
14
- text = re.sub(r"\d+\w*", "", text)
15
- text = re.sub(r"\[.*?\]", "", text)
16
- text = text.translate(str.maketrans("", "", string.punctuation))
17
- return text
18
-
19
-
20
- def lemmize_and_tokenize_text(text: str) -> list[str]:
21
- morph = pymorphy2.MorphAnalyzer()
22
- tokens = word_tokenize(text)
23
- lemmas = [morph.parse(token)[0].normal_form for token in tokens]
24
- return lemmas
25
-
26
-
27
- def data_preprocessing(text: str) -> list[str]:
28
- cleaned_text = clean_text(text)
29
- lemmized_text = lemmize_and_tokenize_text(cleaned_text)
30
- return lemmized_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static_toxic.jpg ADDED