acecalisto3 commited on
Commit
fe81e0b
·
verified ·
1 Parent(s): b8139e1

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +52 -23
app2.py CHANGED
@@ -19,7 +19,8 @@ import validators
19
  from bs4 import BeautifulSoup
20
 
21
  # Configure logging
22
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
23
  logger = logging.getLogger(__name__)
24
 
25
  # Configuration
@@ -37,6 +38,7 @@ history = []
37
  # Database setup
38
  Base = declarative_base()
39
 
 
40
  class Article(Base):
41
  __tablename__ = 'articles'
42
  id = Column(Integer, primary_key=True)
@@ -46,6 +48,7 @@ class Article(Base):
46
  hash = Column(String(32))
47
  timestamp = Column(DateTime, default=datetime.datetime.utcnow)
48
 
 
49
  async def create_db_engine(db_url):
50
  try:
51
  engine = create_engine(db_url)
@@ -54,17 +57,23 @@ async def create_db_engine(db_url):
54
  except SQLAlchemyError as e:
55
  logger.error(f"Database error: {e}")
56
  raise
 
 
57
  def sanitize_url(url: str) -> str:
58
  return url if validators.url(url) else None
59
 
60
- async def fetch_url_content(url: str, session: aiohttp.ClientSession) -> Tuple[str, str]:
 
 
61
  async with session.get(url) as response:
62
  content = await response.text()
63
  soup = BeautifulSoup(content, 'html.parser')
64
  title = soup.title.string if soup.title else "No Title"
65
  return title, content
66
 
67
- async def save_to_database(session, url: str, title: str, content: str, hash: str):
 
 
68
  try:
69
  article = Article(url=url, title=title, content=content, hash=hash)
70
  session.add(article)
@@ -73,18 +82,24 @@ async def save_to_database(session, url: str, title: str, content: str, hash: st
73
  logger.error(f"Database error: {e}")
74
  await session.rollback()
75
 
76
- async def save_to_csv(storage_location: str, url: str, title: str, content: str, timestamp: datetime.datetime):
 
 
77
  try:
78
  os.makedirs(os.path.dirname(storage_location), exist_ok=True)
79
  with open(storage_location, "a", newline='', encoding="utf-8") as csvfile:
80
  csv_writer = csv.writer(csvfile)
81
- csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
 
 
82
  except IOError as e:
83
  logger.error(f"IOError saving to CSV: {e}")
84
  except Exception as e:
85
  logger.error(f"Unexpected error saving to CSV: {e}")
86
 
87
- async def monitor_url(url: str, interval: int, storage_location: str, feed_rss: bool, db_session):
 
 
88
  previous_hash = ""
89
  async with aiohttp.ClientSession() as session:
90
  while True:
@@ -98,14 +113,19 @@ async def monitor_url(url: str, interval: int, storage_location: str, feed_rss:
98
 
99
  if feed_rss:
100
  try:
101
- await save_to_database(db_session, url, title, content, current_hash)
 
102
  except SQLAlchemyError as e:
103
- logger.error(f"Database error while saving {url}: {e}")
 
104
 
105
  if storage_location:
106
- await save_to_csv(storage_location, url, title, content, timestamp)
 
107
 
108
- history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
 
 
109
  logger.info(f"Change detected at {url}")
110
 
111
  change_counts[url] = change_counts.get(url, 0) + 1
@@ -125,23 +145,29 @@ async def monitor_url(url: str, interval: int, storage_location: str, feed_rss:
125
 
126
  await asyncio.sleep(interval)
127
 
128
- async def start_monitoring(urls: List[str], storage_location: str, feed_rss: bool):
 
 
129
  global db_session
130
  for url in urls:
131
  if url not in monitoring_tasks:
132
  sanitized_url = sanitize_url(url)
133
  if sanitized_url:
134
- task = asyncio.create_task(monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL, storage_location, feed_rss, db_session))
 
 
135
  monitoring_tasks[sanitized_url] = task
136
  else:
137
  logger.warning(f"Invalid URL: {url}")
138
  history.append(f"Invalid URL: {url}")
139
  return "Monitoring started"
140
 
 
141
  async def cleanup_resources(url: str):
142
  # Add any cleanup logic here, e.g., closing database connections
143
  pass
144
 
 
145
  def stop_monitoring(url: str):
146
  if url in monitoring_tasks:
147
  monitoring_tasks[url].cancel()
@@ -149,11 +175,6 @@ def stop_monitoring(url: str):
149
  del monitoring_tasks[url]
150
  return "Monitoring stopped"
151
 
152
- async def on_start_click(target_urls_str: str, storage_loc: str, feed_enabled: bool):
153
- urls = [url.strip() for url in target_urls_str.split(",")] # Split the input by commas
154
- result = await start_monitoring(urls, storage_loc if storage_loc else None, feed_enabled)
155
- return result
156
-
157
 
158
  def generate_rss_feed():
159
  session = Session()
@@ -178,9 +199,11 @@ def generate_rss_feed():
178
  finally:
179
  session.close()
180
 
 
181
  async def chatbot_response(message: str, history: List[Tuple[str, str]]):
182
  try:
183
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
 
184
  response = await client.text_generation(message, max_new_tokens=100)
185
 
186
  history.append((message, response[0]['generated_text']))
@@ -188,9 +211,11 @@ async def chatbot_response(message: str, history: List[Tuple[str, str]]):
188
  return history, history
189
  except Exception as e:
190
  logger.error(f"Chatbot error: {e}")
191
- history.append((message, "Error: Could not get a response from the chatbot."))
 
192
  return history, history
193
 
 
194
  async def update_db_status(db_status):
195
  while True:
196
  try:
@@ -200,10 +225,12 @@ async def update_db_status(db_status):
200
  await db_status.update(value="Disconnected")
201
  await asyncio.sleep(60) # Check every minute
202
 
 
203
  async def update_feed_content(db_session):
204
  try:
205
- articles = await db_session. query(Article).order_by(Article.timestamp.desc()).limit(20).all()
206
- feed = {
 
207
  'title': 'Website Changes Feed',
208
  'link': 'http://yourwebsite.com/feed',
209
  'description': 'Feed of changes detected on monitored websites.',
@@ -224,10 +251,11 @@ async def periodic_update_with_error_handling(db_session):
224
  while True:
225
  try:
226
  await asyncio.sleep(300) # Wait for 5 minutes
227
- await update_feed_content(db_session) # Pass db_session here
228
  except Exception as e:
229
  logger.error(f"Error in periodic update: {e}")
230
 
 
231
  async def main():
232
  global db_session
233
  try:
@@ -303,4 +331,5 @@ async def main():
303
 
304
 
305
  if __name__ == "__main__":
306
- asyncio.run(main())
 
 
19
  from bs4 import BeautifulSoup
20
 
21
  # Configure logging
22
+ logging.basicConfig(level=logging.INFO,
23
+ format='%(asctime)s - %(levelname)s - %(message)s')
24
  logger = logging.getLogger(__name__)
25
 
26
  # Configuration
 
38
  # Database setup
39
  Base = declarative_base()
40
 
41
+
42
  class Article(Base):
43
  __tablename__ = 'articles'
44
  id = Column(Integer, primary_key=True)
 
48
  hash = Column(String(32))
49
  timestamp = Column(DateTime, default=datetime.datetime.utcnow)
50
 
51
+
52
  async def create_db_engine(db_url):
53
  try:
54
  engine = create_engine(db_url)
 
57
  except SQLAlchemyError as e:
58
  logger.error(f"Database error: {e}")
59
  raise
60
+
61
+
62
  def sanitize_url(url: str) -> str:
63
  return url if validators.url(url) else None
64
 
65
+
66
+ async def fetch_url_content(url: str,
67
+ session: aiohttp.ClientSession) -> Tuple[str, str]:
68
  async with session.get(url) as response:
69
  content = await response.text()
70
  soup = BeautifulSoup(content, 'html.parser')
71
  title = soup.title.string if soup.title else "No Title"
72
  return title, content
73
 
74
+
75
+ async def save_to_database(session, url: str, title: str, content: str,
76
+ hash: str):
77
  try:
78
  article = Article(url=url, title=title, content=content, hash=hash)
79
  session.add(article)
 
82
  logger.error(f"Database error: {e}")
83
  await session.rollback()
84
 
85
+
86
+ async def save_to_csv(storage_location: str, url: str, title: str,
87
+ content: str, timestamp: datetime.datetime):
88
  try:
89
  os.makedirs(os.path.dirname(storage_location), exist_ok=True)
90
  with open(storage_location, "a", newline='', encoding="utf-8") as csvfile:
91
  csv_writer = csv.writer(csvfile)
92
+ csv_writer.writerow([
93
+ timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content
94
+ ])
95
  except IOError as e:
96
  logger.error(f"IOError saving to CSV: {e}")
97
  except Exception as e:
98
  logger.error(f"Unexpected error saving to CSV: {e}")
99
 
100
+
101
+ async def monitor_url(url: str, interval: int, storage_location: str,
102
+ feed_rss: bool, db_session):
103
  previous_hash = ""
104
  async with aiohttp.ClientSession() as session:
105
  while True:
 
113
 
114
  if feed_rss:
115
  try:
116
+ await save_to_database(db_session, url, title,
117
+ content, current_hash)
118
  except SQLAlchemyError as e:
119
+ logger.error(
120
+ f"Database error while saving {url}: {e}")
121
 
122
  if storage_location:
123
+ await save_to_csv(storage_location, url, title,
124
+ content, timestamp)
125
 
126
+ history.append(
127
+ f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
128
+ )
129
  logger.info(f"Change detected at {url}")
130
 
131
  change_counts[url] = change_counts.get(url, 0) + 1
 
145
 
146
  await asyncio.sleep(interval)
147
 
148
+
149
+ async def start_monitoring(urls: List[str], storage_location: str,
150
+ feed_rss: bool):
151
  global db_session
152
  for url in urls:
153
  if url not in monitoring_tasks:
154
  sanitized_url = sanitize_url(url)
155
  if sanitized_url:
156
+ task = asyncio.create_task(
157
+ monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL,
158
+ storage_location, feed_rss, db_session))
159
  monitoring_tasks[sanitized_url] = task
160
  else:
161
  logger.warning(f"Invalid URL: {url}")
162
  history.append(f"Invalid URL: {url}")
163
  return "Monitoring started"
164
 
165
+
166
  async def cleanup_resources(url: str):
167
  # Add any cleanup logic here, e.g., closing database connections
168
  pass
169
 
170
+
171
  def stop_monitoring(url: str):
172
  if url in monitoring_tasks:
173
  monitoring_tasks[url].cancel()
 
175
  del monitoring_tasks[url]
176
  return "Monitoring stopped"
177
 
 
 
 
 
 
178
 
179
  def generate_rss_feed():
180
  session = Session()
 
199
  finally:
200
  session.close()
201
 
202
+
203
  async def chatbot_response(message: str, history: List[Tuple[str, str]]):
204
  try:
205
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",
206
+ token=HUGGINGFACE_API_KEY)
207
  response = await client.text_generation(message, max_new_tokens=100)
208
 
209
  history.append((message, response[0]['generated_text']))
 
211
  return history, history
212
  except Exception as e:
213
  logger.error(f"Chatbot error: {e}")
214
+ history.append((message,
215
+ "Error: Could not get a response from the chatbot."))
216
  return history, history
217
 
218
+
219
  async def update_db_status(db_status):
220
  while True:
221
  try:
 
225
  await db_status.update(value="Disconnected")
226
  await asyncio.sleep(60) # Check every minute
227
 
228
+
229
  async def update_feed_content(db_session):
230
  try:
231
+ articles = await db_session.query(Article).order_by(
232
+ Article.timestamp.desc()).limit(20).all()
233
+ feed {
234
  'title': 'Website Changes Feed',
235
  'link': 'http://yourwebsite.com/feed',
236
  'description': 'Feed of changes detected on monitored websites.',
 
251
  while True:
252
  try:
253
  await asyncio.sleep(300) # Wait for 5 minutes
254
+ await update_feed_content(db_session)
255
  except Exception as e:
256
  logger.error(f"Error in periodic update: {e}")
257
 
258
+
259
  async def main():
260
  global db_session
261
  try:
 
331
 
332
 
333
  if __name__ == "__main__":
334
+ asyncio.run(main())
335
+