max-unfinity commited on
Commit
9beb013
·
1 Parent(s): 5cec537
Files changed (5) hide show
  1. .gitignore +3 -1
  2. app.py +59 -6
  3. chatgpt.py +17 -0
  4. requirements.txt +2 -1
  5. selenium_parser.py +9 -3
.gitignore CHANGED
@@ -1 +1,3 @@
1
- __pycache__
 
 
 
1
+ __pycache__
2
+ *.env
3
+ *.toml
app.py CHANGED
@@ -1,12 +1,25 @@
1
- from init_env import init_env
2
- from selenium_parser import load_driver
3
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  @st.cache_data
7
  def init():
8
- # print("init")
9
- init_env()
 
 
10
 
11
 
12
  @st.cache_resource
@@ -16,11 +29,51 @@ def get_driver():
16
 
17
  def run():
18
  driver = get_driver()
19
- driver.get("https://www.booking.com/hotel/th/queen-boutique.ru.html#tab-reviews")
 
 
 
 
 
 
 
 
 
20
  st.write("Page loaded:")
21
  st.image(driver.get_screenshot_as_png(), caption="screenshot")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  init()
25
 
26
- st.button("Run", on_click=run)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+
4
+ from init_env import init_env
5
+ from selenium_parser import extract_url, load_driver, scrape_page
6
+ from chatgpt import get_models, query
7
+
8
+
9
+ try:
10
+ from dotenv import load_dotenv
11
+ load_dotenv(".env")
12
+ api_key = os.getenv("OPENAI_API_KEY")
13
+ except ImportError:
14
+ api_key = None
15
 
16
 
17
  @st.cache_data
18
  def init():
19
+ if api_key:
20
+ print("LOCAL TEST")
21
+ else:
22
+ init_env()
23
 
24
 
25
  @st.cache_resource
 
29
 
30
  def run():
31
  driver = get_driver()
32
+ url_reviews = extract_url(url) if extract else url
33
+
34
+ try:
35
+ reviews = scrape_page(driver, url_reviews, page_count=page_count, wait_time=wait_time)
36
+ except Exception as e:
37
+ st.write(e)
38
+ st.write("Page loaded:")
39
+ st.image(driver.get_screenshot_as_png(), caption="screenshot")
40
+ raise e
41
+
42
  st.write("Page loaded:")
43
  st.image(driver.get_screenshot_as_png(), caption="screenshot")
44
+ with st.expander("Reviews"):
45
+ st.json(reviews)
46
+ st.markdown(f"Collected {len(reviews)} reviews")
47
+
48
+ # ChatGPT answer
49
+ stream = query(api_key, reviews, model=selected_model)
50
+
51
+ st.divider()
52
+ st.markdown(f"**{selected_model}:**")
53
+
54
+ response = ""
55
+ message_placeholder = st.empty()
56
+ for chunk in stream:
57
+ text = chunk.choices[0].delta.content
58
+ if text is not None:
59
+ response += text
60
+ message_placeholder.markdown(response + "▌")
61
+ message_placeholder.markdown(response)
62
 
63
 
64
  init()
65
 
66
+ api_key = st.sidebar.text_input("Enter API key", value=api_key)
67
+ url = st.text_input("Enter URL", value="https://www.booking.com/hotel/th/queen-boutique.ru.html#tab-reviews")
68
+ extract = st.checkbox("Extract URL", value=True)
69
+
70
+ if api_key:
71
+ models = get_models(api_key)
72
+ selected_model = st.sidebar.selectbox("Select model", models)
73
+
74
+ st.sidebar.divider()
75
+ page_count = st.sidebar.slider("Number of pages", 0, 10, value=5)
76
+ wait_time = st.sidebar.number_input("Wait time (sec)", value=4)
77
+
78
+ if st.button("Run"):
79
+ run()
chatgpt.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ def get_models(api_key):
4
+ client = OpenAI(api_key=api_key)
5
+ models = client.models.list()
6
+ models = sorted([model.id for model in models.data if model.id.startswith("gpt")])
7
+ return models
8
+
9
+ def query(api_key, reviews: list, model: str = "gpt-3.5-turbo"):
10
+ client = OpenAI(api_key=api_key)
11
+ message = str(reviews) + "\n\nAnalyze and provide a structured summary of both the positive and negative features of the hotel based on the above reviews. Quantify the frequency of each aspect mentioned in the reviews."
12
+ stream = client.chat.completions.create(
13
+ model=model,
14
+ messages=[{"role": "user", "content": message}],
15
+ stream=True,
16
+ )
17
+ return stream
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  selenium==4.17.2
2
  beautifulsoup4==4.11.1
3
- streamlit # 1.30.0
 
1
+ streamlit # 1.30.0
2
  selenium==4.17.2
3
  beautifulsoup4==4.11.1
4
+ openai==1.10.0
selenium_parser.py CHANGED
@@ -54,12 +54,11 @@ def parse_review(html):
54
  }
55
 
56
 
57
- def scrape_page(driver: webdriver.Firefox, url: str, page_count: int = 5, wait_time: int = 1):
58
  # url = "https://www.booking.com/hotel/th/queen-boutique.ru.html#tab-reviews"
59
  review_infos = []
60
 
61
  driver.get(url)
62
- print("page loaded")
63
 
64
  for i in range(page_count):
65
  WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "review_list_new_item_block")))
@@ -83,4 +82,11 @@ def scrape_page(driver: webdriver.Firefox, url: str, page_count: int = 5, wait_t
83
  pagenext = driver.find_element(By.CLASS_NAME, "pagenext")
84
  pagenext.click()
85
 
86
- return review_infos
 
 
 
 
 
 
 
 
54
  }
55
 
56
 
57
+ def scrape_page(driver: webdriver.Firefox, url: str, page_count: int = 5, wait_time: int = 3):
58
  # url = "https://www.booking.com/hotel/th/queen-boutique.ru.html#tab-reviews"
59
  review_infos = []
60
 
61
  driver.get(url)
 
62
 
63
  for i in range(page_count):
64
  WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "review_list_new_item_block")))
 
82
  pagenext = driver.find_element(By.CLASS_NAME, "pagenext")
83
  pagenext.click()
84
 
85
+ return review_infos
86
+
87
+
88
+ def extract_url(url: str):
89
+ if "?" in url:
90
+ url = url.split("?")[0]
91
+ url += "#tab-reviews"
92
+ return url