Maslov-Artem commited on
Commit
afed7b5
·
1 Parent(s): fe311a6

minor changes

Browse files
Files changed (3) hide show
  1. .gitignore +4 -1
  2. app.py +6 -3
  3. preprocessing.py +2 -3
.gitignore CHANGED
@@ -1,3 +1,6 @@
1
  .venv
2
  healthcare_facilities_reviews.jsonl
3
- nlp_models.ipynb
 
 
 
 
1
  .venv
2
  healthcare_facilities_reviews.jsonl
3
+ *.ipynb
4
+ __pycache__/
5
+ *.csv
6
+ .ipynb_checkoints/
app.py CHANGED
@@ -38,7 +38,10 @@ def predict_sentiment(text):
38
  st.title("Sentiment Analysis with Logistic Regression")
39
  text_input = st.text_input("Enter your review:")
40
  if st.button("Predict"):
41
- st.write("Knopka")
42
  prediction = predict_sentiment(text_input)
43
- st.write("prediction")
44
- st.write("Predicted Sentiment:", prediction)
 
 
 
 
 
38
  st.title("Sentiment Analysis with Logistic Regression")
39
  text_input = st.text_input("Enter your review:")
40
  if st.button("Predict"):
 
41
  prediction = predict_sentiment(text_input)
42
+ if prediction == 1:
43
+ st.write("prediction")
44
+ st.write("Отзыв положительный")
45
+ elif prediction == 0:
46
+ st.write("prediction")
47
+ st.write("Отзыв отрицательный")
preprocessing.py CHANGED
@@ -1,11 +1,11 @@
1
  import re
2
  import string
3
 
 
4
  import pymorphy2
5
- from nltk.corpus import stopwords
6
  from nltk.tokenize import word_tokenize
7
 
8
- stop_words = set(stopwords.words("russian"))
9
 
10
 
11
  def clean_text(text: str) -> str:
@@ -20,7 +20,6 @@ def clean_text(text: str) -> str:
20
  def lemmize_and_tokenize_text(text: str) -> list[str]:
21
  morph = pymorphy2.MorphAnalyzer()
22
  tokens = word_tokenize(text)
23
- tokens = [token for token in tokens if token not in stop_words]
24
  lemmas = [morph.parse(token)[0].normal_form for token in tokens]
25
  return lemmas
26
 
 
1
  import re
2
  import string
3
 
4
+ import nltk
5
  import pymorphy2
 
6
  from nltk.tokenize import word_tokenize
7
 
8
+ nltk.download("punkt")
9
 
10
 
11
  def clean_text(text: str) -> str:
 
20
  def lemmize_and_tokenize_text(text: str) -> list[str]:
21
  morph = pymorphy2.MorphAnalyzer()
22
  tokens = word_tokenize(text)
 
23
  lemmas = [morph.parse(token)[0].normal_form for token in tokens]
24
  return lemmas
25