Kuberwastaken commited on
Commit
6f07235
·
1 Parent(s): 30bc5c5

Reverted to non-IMSDB version until it's stable

Browse files
.gitignore CHANGED
@@ -1 +1 @@
1
- treat-scrape
 
1
+ treat-env
__pycache__/script_search_api.cpython-310.pyc DELETED
Binary file (6.02 kB)
 
gradio_app.py CHANGED
@@ -2,11 +2,6 @@ import gradio as gr
2
  from model.analyzer import analyze_content
3
  import asyncio
4
  import time
5
- import httpx
6
- import subprocess
7
- import signal
8
- import atexit
9
-
10
 
11
  custom_css = """
12
  * {
@@ -217,94 +212,30 @@ footer {
217
  100% { transform: scale(1); }
218
  }
219
  """
220
- # Start the API server
221
- def start_api_server():
222
- # Start uvicorn in a subprocess
223
- process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
224
- return process
225
-
226
- # Stop the API server
227
- def stop_api_server(process):
228
- process.terminate()
229
-
230
- # Register the exit handler
231
- api_process = start_api_server()
232
- atexit.register(stop_api_server, api_process)
233
-
234
- async def fetch_and_analyze_script(movie_name, progress=gr.Progress(track_tqdm=True)):
235
- try:
236
- async with httpx.AsyncClient(timeout=60.0) as client:
237
- # Start the analysis request
238
- progress(0.2, desc="Initiating script search...")
239
- response = await client.get(
240
- f"http://localhost:8000/api/fetch_and_analyze",
241
- params={"movie_name": movie_name}
242
- )
243
-
244
- if response.status_code == 200:
245
- # Start progress polling
246
- while True:
247
- progress_response = await client.get(
248
- f"http://localhost:8000/api/progress",
249
- params={"movie_name": movie_name}
250
- )
251
-
252
- if progress_response.status_code == 200:
253
- progress_data = progress_response.json()
254
- current_progress = progress_data["progress"]
255
- current_status = progress_data.get("status", "Processing...")
256
-
257
- progress(current_progress, desc=current_status)
258
-
259
- if current_progress >= 1.0:
260
- break
261
-
262
- await asyncio.sleep(0.5) # Poll every 500ms
263
-
264
- result = response.json()
265
- triggers = result.get("detected_triggers", [])
266
-
267
- if not triggers or triggers == ["None"]:
268
- formatted_result = "✓ No triggers detected in the content."
269
- else:
270
- trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
271
- formatted_result = f"⚠ Triggers Detected:\n{trigger_list}"
272
-
273
- return formatted_result
274
- else:
275
- return f"Error: Server returned status code {response.status_code}"
276
-
277
- except httpx.TimeoutError:
278
- return "Error: Request timed out. Please try again."
279
- except Exception as e:
280
- return f"An unexpected error occurred: {str(e)}"
281
-
282
- async def track_progress(movie_name, progress):
283
- async with httpx.AsyncClient() as client:
284
- while True:
285
- response = await client.get(f"http://localhost:8000/api/progress", params={"movie_name": movie_name})
286
- if response.status_code == 200:
287
- progress_data = response.json()
288
- progress(progress_data["progress"], desc="Tracking progress...")
289
- if progress_data["progress"] >= 1.0:
290
- break
291
- await asyncio.sleep(1)
292
 
293
  def analyze_with_loading(text, progress=gr.Progress()):
 
 
 
 
294
  progress(0, desc="Starting analysis...")
295
 
 
296
  for i in range(25):
297
- time.sleep(0.04)
298
  progress((i + 1) / 100, desc="Initializing analysis...")
299
 
 
300
  for i in range(25, 45):
301
  time.sleep(0.03)
302
  progress((i + 1) / 100, desc="Pre-processing content...")
303
 
 
304
  progress(0.45, desc="Analyzing content...")
305
  try:
306
  result = asyncio.run(analyze_content(text))
307
 
 
308
  for i in range(45, 75):
309
  time.sleep(0.03)
310
  progress((i + 1) / 100, desc="Processing results...")
@@ -312,10 +243,12 @@ def analyze_with_loading(text, progress=gr.Progress()):
312
  except Exception as e:
313
  return f"Error during analysis: {str(e)}"
314
 
 
315
  for i in range(75, 100):
316
  time.sleep(0.02)
317
  progress((i + 1) / 100, desc="Finalizing results...")
318
 
 
319
  triggers = result["detected_triggers"]
320
  if triggers == ["None"]:
321
  return "✓ No triggers detected in the content."
@@ -323,7 +256,9 @@ def analyze_with_loading(text, progress=gr.Progress()):
323
  trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
324
  return f"⚠ Triggers Detected:\n{trigger_list}"
325
 
 
326
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
 
327
  gr.HTML("""
328
  <div class="treat-title">
329
  <h1>TREAT</h1>
@@ -335,6 +270,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
335
  </div>
336
  """)
337
 
 
338
  with gr.Row():
339
  with gr.Column(elem_classes="content-area"):
340
  input_text = gr.Textbox(
@@ -343,21 +279,15 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
343
  lines=8,
344
  interactive=True
345
  )
346
- with gr.Row():
347
- search_query = gr.Textbox(
348
- label="Search Movie Scripts",
349
- placeholder="Enter movie title...",
350
- lines=1,
351
- interactive=True
352
- )
353
 
 
354
  with gr.Row(elem_classes="center-row"):
355
  analyze_btn = gr.Button(
356
  "✨ Analyze Content",
357
  variant="primary"
358
  )
359
- search_button = gr.Button("🔍 Search and Analyze Script")
360
 
 
361
  with gr.Row():
362
  with gr.Column(elem_classes="results-area"):
363
  output_text = gr.Textbox(
@@ -365,33 +295,22 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
365
  lines=5,
366
  interactive=False
367
  )
368
- status_text = gr.Markdown(
369
- label="Status",
370
- value=""
371
- )
372
 
 
373
  analyze_btn.click(
374
  fn=analyze_with_loading,
375
  inputs=[input_text],
376
  outputs=[output_text],
377
  api_name="analyze"
378
  )
379
-
380
- search_button.click(
381
- fn=fetch_and_analyze_script,
382
- inputs=[search_query],
383
- outputs=[output_text],
384
- show_progress=True
385
- )
386
 
 
387
  gr.HTML("""
388
  <div class="footer">
389
  <p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>
390
  </div>
391
  """)
392
 
393
- # Launch the Gradio interface
394
-
395
  if __name__ == "__main__":
396
  iface.launch(
397
  share=False,
 
2
  from model.analyzer import analyze_content
3
  import asyncio
4
  import time
 
 
 
 
 
5
 
6
  custom_css = """
7
  * {
 
212
  100% { transform: scale(1); }
213
  }
214
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
  def analyze_with_loading(text, progress=gr.Progress()):
217
+ """
218
+ Synchronous wrapper for the async analyze_content function with smooth progress updates
219
+ """
220
+ # Initialize progress
221
  progress(0, desc="Starting analysis...")
222
 
223
+ # Initial setup phase - smoother progression
224
  for i in range(25):
225
+ time.sleep(0.04) # Slightly longer sleep for smoother animation
226
  progress((i + 1) / 100, desc="Initializing analysis...")
227
 
228
+ # Pre-processing phase
229
  for i in range(25, 45):
230
  time.sleep(0.03)
231
  progress((i + 1) / 100, desc="Pre-processing content...")
232
 
233
+ # Perform analysis
234
  progress(0.45, desc="Analyzing content...")
235
  try:
236
  result = asyncio.run(analyze_content(text))
237
 
238
+ # Analysis progress simulation
239
  for i in range(45, 75):
240
  time.sleep(0.03)
241
  progress((i + 1) / 100, desc="Processing results...")
 
243
  except Exception as e:
244
  return f"Error during analysis: {str(e)}"
245
 
246
+ # Final processing with smooth progression
247
  for i in range(75, 100):
248
  time.sleep(0.02)
249
  progress((i + 1) / 100, desc="Finalizing results...")
250
 
251
+ # Format the results
252
  triggers = result["detected_triggers"]
253
  if triggers == ["None"]:
254
  return "✓ No triggers detected in the content."
 
256
  trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
257
  return f"⚠ Triggers Detected:\n{trigger_list}"
258
 
259
+ # Create the Gradio interface
260
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
261
+ # Title section
262
  gr.HTML("""
263
  <div class="treat-title">
264
  <h1>TREAT</h1>
 
270
  </div>
271
  """)
272
 
273
+ # Content input section
274
  with gr.Row():
275
  with gr.Column(elem_classes="content-area"):
276
  input_text = gr.Textbox(
 
279
  lines=8,
280
  interactive=True
281
  )
 
 
 
 
 
 
 
282
 
283
+ # Button section
284
  with gr.Row(elem_classes="center-row"):
285
  analyze_btn = gr.Button(
286
  "✨ Analyze Content",
287
  variant="primary"
288
  )
 
289
 
290
+ # Results section
291
  with gr.Row():
292
  with gr.Column(elem_classes="results-area"):
293
  output_text = gr.Textbox(
 
295
  lines=5,
296
  interactive=False
297
  )
 
 
 
 
298
 
299
+ # Set up the click event
300
  analyze_btn.click(
301
  fn=analyze_with_loading,
302
  inputs=[input_text],
303
  outputs=[output_text],
304
  api_name="analyze"
305
  )
 
 
 
 
 
 
 
306
 
307
+ # Footer section
308
  gr.HTML("""
309
  <div class="footer">
310
  <p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>
311
  </div>
312
  """)
313
 
 
 
314
  if __name__ == "__main__":
315
  iface.launch(
316
  share=False,
requirements.txt CHANGED
@@ -1,13 +1,8 @@
 
 
1
  torch
2
  gradio
 
3
  accelerate
4
  safetensors
5
- huggingface-hub
6
- fastapi
7
- httpx
8
- beautifulsoup4
9
- bs4
10
- httpx
11
- flask
12
- flask_cors
13
- transformers
 
1
+ flask
2
+ flask_cors
3
  torch
4
  gradio
5
+ transformers
6
  accelerate
7
  safetensors
8
+ huggingface-hub
 
 
 
 
 
 
 
 
script_search_api.py DELETED
@@ -1,212 +0,0 @@
1
- # script_search_api.py
2
-
3
- from fastapi import FastAPI, HTTPException
4
- from fastapi.middleware.cors import CORSMiddleware
5
- import requests
6
- from bs4 import BeautifulSoup
7
- from model.analyzer import analyze_content
8
- import logging
9
- from difflib import get_close_matches
10
- import re
11
- from typing import Dict
12
- from dataclasses import dataclass
13
- from datetime import datetime
14
-
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
-
18
- app = FastAPI()
19
-
20
- app.add_middleware(
21
- CORSMiddleware,
22
- allow_origins=["*"],
23
- allow_credentials=True,
24
- allow_methods=["*"],
25
- allow_headers=["*"],
26
- )
27
-
28
- BASE_URL = "https://imsdb.com"
29
- ALL_SCRIPTS_URL = f"{BASE_URL}/all-scripts.html"
30
-
31
- @dataclass
32
- class ProgressInfo:
33
- progress: float
34
- status: str
35
- timestamp: datetime
36
-
37
- progress_tracker: Dict[str, ProgressInfo] = {}
38
-
39
- def update_progress(movie_name: str, progress: float, message: str):
40
- """
41
- Update the progress tracker with current progress and status message.
42
- """
43
- progress_tracker[movie_name] = ProgressInfo(
44
- progress=progress,
45
- status=message,
46
- timestamp=datetime.now()
47
- )
48
- logger.info(f"{message} (Progress: {progress * 100:.0f}%)")
49
-
50
- def find_movie_link(movie_name: str, soup: BeautifulSoup) -> str | None:
51
- """
52
- Find the closest matching movie link from the script database.
53
- """
54
- movie_links = {link.text.strip().lower(): link['href'] for link in soup.find_all('a', href=True)}
55
- close_matches = get_close_matches(movie_name.lower(), movie_links.keys(), n=1, cutoff=0.6)
56
-
57
- if close_matches:
58
- logger.info(f"Close match found: {close_matches[0]}")
59
- return BASE_URL + movie_links[close_matches[0]]
60
-
61
- logger.info("No close match found.")
62
- return None
63
-
64
- def find_script_link(soup: BeautifulSoup, movie_name: str) -> str | None:
65
- """
66
- Find the script download link for a given movie.
67
- """
68
- patterns = [
69
- f'Read "{movie_name}" Script',
70
- f'Read "{movie_name.title()}" Script',
71
- f'Read "{movie_name.upper()}" Script',
72
- f'Read "{movie_name.lower()}" Script'
73
- ]
74
-
75
- for link in soup.find_all('a', href=True):
76
- link_text = link.text.strip()
77
- if any(pattern.lower() in link_text.lower() for pattern in patterns):
78
- return link['href']
79
- elif all(word.lower() in link_text.lower() for word in ["Read", "Script", movie_name]):
80
- return link['href']
81
- return None
82
-
83
- def fetch_script(movie_name: str) -> str | None:
84
- """
85
- Fetch and extract the script content for a given movie.
86
- """
87
- # Initial page load
88
- update_progress(movie_name, 0.1, "Fetching the script database...")
89
- try:
90
- response = requests.get(ALL_SCRIPTS_URL)
91
- response.raise_for_status()
92
- except requests.RequestException as e:
93
- logger.error(f"Failed to load the main page: {str(e)}")
94
- return None
95
-
96
- # Search for movie
97
- update_progress(movie_name, 0.2, "Searching for the movie...")
98
- soup = BeautifulSoup(response.text, 'html.parser')
99
- movie_link = find_movie_link(movie_name, soup)
100
-
101
- if not movie_link:
102
- logger.error(f"Script for '{movie_name}' not found.")
103
- return None
104
-
105
- # Fetch movie page
106
- update_progress(movie_name, 0.3, "Loading movie details...")
107
- try:
108
- response = requests.get(movie_link)
109
- response.raise_for_status()
110
- except requests.RequestException as e:
111
- logger.error(f"Failed to load the movie page: {str(e)}")
112
- return None
113
-
114
- # Find script link
115
- update_progress(movie_name, 0.4, "Locating script download...")
116
- soup = BeautifulSoup(response.text, 'html.parser')
117
- script_link = find_script_link(soup, movie_name)
118
-
119
- if not script_link:
120
- logger.error(f"Unable to find script link for '{movie_name}'.")
121
- return None
122
-
123
- # Fetch script content
124
- script_page_url = BASE_URL + script_link
125
- update_progress(movie_name, 0.5, "Downloading script content...")
126
-
127
- try:
128
- response = requests.get(script_page_url)
129
- response.raise_for_status()
130
- except requests.RequestException as e:
131
- logger.error(f"Failed to load the script: {str(e)}")
132
- return None
133
-
134
- # Extract script text
135
- update_progress(movie_name, 0.6, "Extracting script text...")
136
- soup = BeautifulSoup(response.text, 'html.parser')
137
- script_content = soup.find('pre')
138
-
139
- if script_content:
140
- update_progress(movie_name, 0.7, "Script extracted successfully")
141
- return script_content.get_text()
142
- else:
143
- logger.error("Failed to extract script content.")
144
- return None
145
-
146
- @app.get("/api/fetch_and_analyze")
147
- async def fetch_and_analyze(movie_name: str):
148
- """
149
- Fetch and analyze a movie script, with progress tracking.
150
- """
151
- try:
152
- # Initialize progress
153
- update_progress(movie_name, 0.0, "Starting script search...")
154
-
155
- # Fetch script
156
- script_text = fetch_script(movie_name)
157
- if not script_text:
158
- raise HTTPException(status_code=404, detail="Script not found or error occurred")
159
-
160
- # Analyze content
161
- update_progress(movie_name, 0.8, "Analyzing script content...")
162
- result = await analyze_content(script_text)
163
-
164
- # Finalize
165
- update_progress(movie_name, 1.0, "Analysis complete!")
166
- return result
167
-
168
- except Exception as e:
169
- logger.error(f"Error in fetch_and_analyze: {str(e)}", exc_info=True)
170
- # Clean up progress tracker in case of error
171
- if movie_name in progress_tracker:
172
- del progress_tracker[movie_name]
173
- raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
174
-
175
- @app.get("/api/progress")
176
- def get_progress(movie_name: str):
177
- """
178
- Get the current progress and status for a movie analysis.
179
- """
180
- if movie_name not in progress_tracker:
181
- return {
182
- "progress": 0,
183
- "status": "Waiting to start..."
184
- }
185
-
186
- progress_info = progress_tracker[movie_name]
187
-
188
- # Clean up old entries (optional)
189
- current_time = datetime.now()
190
- if (current_time - progress_info.timestamp).total_seconds() > 3600: # 1 hour timeout
191
- del progress_tracker[movie_name]
192
- return {
193
- "progress": 0,
194
- "status": "Session expired. Please try again."
195
- }
196
-
197
- return {
198
- "progress": progress_info.progress,
199
- "status": progress_info.status
200
- }
201
-
202
- @app.on_event("startup")
203
- async def startup_event():
204
- """
205
- Initialize the server and clear any existing progress data.
206
- """
207
- progress_tracker.clear()
208
- logger.info("Server started, progress tracker initialized")
209
-
210
- if __name__ == "__main__":
211
- import uvicorn
212
- uvicorn.run(app, host="0.0.0.0", port=8000)