Spaces:

Kuberwastaken
/

TREAT

Running

App Files Files Community

Kuberwastaken commited on 24 days ago

Commit

736f3ff

1 Parent(s): 6a0dc28

Added Support for searching movies

Browse files

Files changed (5) hide show

.gitignore +1 -1
__pycache__/script_search_api.cpython-310.pyc +0 -0
gradio_app.py +82 -18
requirements.txt +7 -4
script_search_api.py +212 -0

.gitignore CHANGED Viewed

	@@ -1 +1 @@
1	- treat-~~env~~


1	+ treat-scrape

__pycache__/script_search_api.cpython-310.pyc ADDED Viewed

Binary file (6.02 kB). View file

gradio_app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 from model.analyzer import analyze_content
 import asyncio
 import time
 custom_css = """
 * {
@@ -213,29 +215,81 @@ footer {
 }
 """
 def analyze_with_loading(text, progress=gr.Progress()):
-    """
-    Synchronous wrapper for the async analyze_content function with smooth progress updates
-    """
-    # Initialize progress
     progress(0, desc="Starting analysis...")
-    # Initial setup phase - smoother progression
     for i in range(25):
-        time.sleep(0.04)  # Slightly longer sleep for smoother animation
         progress((i + 1) / 100, desc="Initializing analysis...")
-    # Pre-processing phase
     for i in range(25, 45):
         time.sleep(0.03)
         progress((i + 1) / 100, desc="Pre-processing content...")
-    # Perform analysis
     progress(0.45, desc="Analyzing content...")
     try:
         result = asyncio.run(analyze_content(text))
-        # Analysis progress simulation
         for i in range(45, 75):
             time.sleep(0.03)
             progress((i + 1) / 100, desc="Processing results...")
@@ -243,12 +297,10 @@ def analyze_with_loading(text, progress=gr.Progress()):
     except Exception as e:
         return f"Error during analysis: {str(e)}"
-    # Final processing with smooth progression
     for i in range(75, 100):
         time.sleep(0.02)
         progress((i + 1) / 100, desc="Finalizing results...")
-    # Format the results
     triggers = result["detected_triggers"]
     if triggers == ["None"]:
         return "✓ No triggers detected in the content."
@@ -256,9 +308,7 @@ def analyze_with_loading(text, progress=gr.Progress()):
         trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
         return f"⚠ Triggers Detected:\n{trigger_list}"
-# Create the Gradio interface
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
-    # Title section
     gr.HTML("""
         <div class="treat-title">
             <h1>TREAT</h1>
@@ -270,7 +320,6 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
         </div>
     """)
-    # Content input section
     with gr.Row():
         with gr.Column(elem_classes="content-area"):
             input_text = gr.Textbox(
@@ -279,15 +328,21 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
                 lines=8,
                 interactive=True
             )
-    # Button section
     with gr.Row(elem_classes="center-row"):
         analyze_btn = gr.Button(
             "✨ Analyze Content",
             variant="primary"
         )
-    # Results section
     with gr.Row():
         with gr.Column(elem_classes="results-area"):
             output_text = gr.Textbox(
@@ -295,16 +350,25 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
                 lines=5,
                 interactive=False
             )
-    # Set up the click event
     analyze_btn.click(
         fn=analyze_with_loading,
         inputs=[input_text],
         outputs=[output_text],
         api_name="analyze"
     )
-   # Footer section
     gr.HTML("""
         <div class="footer">
             <p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>

 from model.analyzer import analyze_content
 import asyncio
 import time
+import httpx
+import json
 custom_css = """
 * {
 }
 """
+async def fetch_and_analyze_script(movie_name, progress=gr.Progress(track_tqdm=True)):
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            # Start the analysis request
+            progress(0.2, desc="Initiating script search...")
+            response = await client.get(
+                f"http://localhost:8000/api/fetch_and_analyze",
+                params={"movie_name": movie_name}
+            )
+            if response.status_code == 200:
+                # Start progress polling
+                while True:
+                    progress_response = await client.get(
+                        f"http://localhost:8000/api/progress",
+                        params={"movie_name": movie_name}
+                    )
+                    if progress_response.status_code == 200:
+                        progress_data = progress_response.json()
+                        current_progress = progress_data["progress"]
+                        current_status = progress_data.get("status", "Processing...")
+                        progress(current_progress, desc=current_status)
+                        if current_progress >= 1.0:
+                            break
+                    await asyncio.sleep(0.5)  # Poll every 500ms
+                result = response.json()
+                triggers = result.get("detected_triggers", [])
+                if not triggers or triggers == ["None"]:
+                    formatted_result = "✓ No triggers detected in the content."
+                else:
+                    trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
+                    formatted_result = f"⚠ Triggers Detected:\n{trigger_list}"
+                return formatted_result
+            else:
+                return f"Error: Server returned status code {response.status_code}"
+    except httpx.TimeoutError:
+        return "Error: Request timed out. Please try again."
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
+async def track_progress(movie_name, progress):
+    async with httpx.AsyncClient() as client:
+        while True:
+            response = await client.get(f"http://localhost:8000/api/progress", params={"movie_name": movie_name})
+            if response.status_code == 200:
+                progress_data = response.json()
+                progress(progress_data["progress"], desc="Tracking progress...")
+                if progress_data["progress"] >= 1.0:
+                    break
+            await asyncio.sleep(1)
 def analyze_with_loading(text, progress=gr.Progress()):
     progress(0, desc="Starting analysis...")
     for i in range(25):
+        time.sleep(0.04)
         progress((i + 1) / 100, desc="Initializing analysis...")
     for i in range(25, 45):
         time.sleep(0.03)
         progress((i + 1) / 100, desc="Pre-processing content...")
     progress(0.45, desc="Analyzing content...")
     try:
         result = asyncio.run(analyze_content(text))
         for i in range(45, 75):
             time.sleep(0.03)
             progress((i + 1) / 100, desc="Processing results...")
     except Exception as e:
         return f"Error during analysis: {str(e)}"
     for i in range(75, 100):
         time.sleep(0.02)
         progress((i + 1) / 100, desc="Finalizing results...")
     triggers = result["detected_triggers"]
     if triggers == ["None"]:
         return "✓ No triggers detected in the content."
         trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
         return f"⚠ Triggers Detected:\n{trigger_list}"
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
     gr.HTML("""
         <div class="treat-title">
             <h1>TREAT</h1>
         </div>
     """)
     with gr.Row():
         with gr.Column(elem_classes="content-area"):
             input_text = gr.Textbox(
                 lines=8,
                 interactive=True
             )
+            with gr.Row():
+                search_query = gr.Textbox(
+                    label="Search Movie Scripts",
+                    placeholder="Enter movie title...",
+                    lines=1,
+                    interactive=True
+                )
     with gr.Row(elem_classes="center-row"):
         analyze_btn = gr.Button(
             "✨ Analyze Content",
             variant="primary"
         )
+        search_button = gr.Button("🔍 Search and Analyze Script")
     with gr.Row():
         with gr.Column(elem_classes="results-area"):
             output_text = gr.Textbox(
                 lines=5,
                 interactive=False
             )
+            status_text = gr.Markdown(
+                label="Status",
+                value=""
+            )
     analyze_btn.click(
         fn=analyze_with_loading,
         inputs=[input_text],
         outputs=[output_text],
         api_name="analyze"
     )
+    search_button.click(
+        fn=fetch_and_analyze_script,
+        inputs=[search_query],
+        outputs=[output_text],
+        show_progress=True
+    )
     gr.HTML("""
         <div class="footer">
             <p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>

requirements.txt CHANGED Viewed

@@ -1,8 +1,11 @@
-flask
-flask_cors
 torch
 gradio
-transformers
 accelerate
 safetensors
-huggingface-hub

 torch
 gradio
 accelerate
 safetensors
+huggingface-hub
+fastapi
+httpx
+beautifulsoup4
+bs4
+httpx
+json

script_search_api.py ADDED Viewed

	@@ -0,0 +1,212 @@

+# script_search_api.py
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import requests
+from bs4 import BeautifulSoup
+from model.analyzer import analyze_content
+import logging
+from difflib import get_close_matches
+import re
+from typing import Dict
+from dataclasses import dataclass
+from datetime import datetime
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+BASE_URL = "https://imsdb.com"
+ALL_SCRIPTS_URL = f"{BASE_URL}/all-scripts.html"
+@dataclass
+class ProgressInfo:
+    progress: float
+    status: str
+    timestamp: datetime
+progress_tracker: Dict[str, ProgressInfo] = {}
+def update_progress(movie_name: str, progress: float, message: str):
+    """
+    Update the progress tracker with current progress and status message.
+    """
+    progress_tracker[movie_name] = ProgressInfo(
+        progress=progress,
+        status=message,
+        timestamp=datetime.now()
+    )
+    logger.info(f"{message} (Progress: {progress * 100:.0f}%)")
+def find_movie_link(movie_name: str, soup: BeautifulSoup) -> str | None:
+    """
+    Find the closest matching movie link from the script database.
+    """
+    movie_links = {link.text.strip().lower(): link['href'] for link in soup.find_all('a', href=True)}
+    close_matches = get_close_matches(movie_name.lower(), movie_links.keys(), n=1, cutoff=0.6)
+    if close_matches:
+        logger.info(f"Close match found: {close_matches[0]}")
+        return BASE_URL + movie_links[close_matches[0]]
+    logger.info("No close match found.")
+    return None
+def find_script_link(soup: BeautifulSoup, movie_name: str) -> str | None:
+    """
+    Find the script download link for a given movie.
+    """
+    patterns = [
+        f'Read "{movie_name}" Script',
+        f'Read "{movie_name.title()}" Script',
+        f'Read "{movie_name.upper()}" Script',
+        f'Read "{movie_name.lower()}" Script'
+    ]
+    for link in soup.find_all('a', href=True):
+        link_text = link.text.strip()
+        if any(pattern.lower() in link_text.lower() for pattern in patterns):
+            return link['href']
+        elif all(word.lower() in link_text.lower() for word in ["Read", "Script", movie_name]):
+            return link['href']
+    return None
+def fetch_script(movie_name: str) -> str | None:
+    """
+    Fetch and extract the script content for a given movie.
+    """
+    # Initial page load
+    update_progress(movie_name, 0.1, "Fetching the script database...")
+    try:
+        response = requests.get(ALL_SCRIPTS_URL)
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.error(f"Failed to load the main page: {str(e)}")
+        return None
+    # Search for movie
+    update_progress(movie_name, 0.2, "Searching for the movie...")
+    soup = BeautifulSoup(response.text, 'html.parser')
+    movie_link = find_movie_link(movie_name, soup)
+    if not movie_link:
+        logger.error(f"Script for '{movie_name}' not found.")
+        return None
+    # Fetch movie page
+    update_progress(movie_name, 0.3, "Loading movie details...")
+    try:
+        response = requests.get(movie_link)
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.error(f"Failed to load the movie page: {str(e)}")
+        return None
+    # Find script link
+    update_progress(movie_name, 0.4, "Locating script download...")
+    soup = BeautifulSoup(response.text, 'html.parser')
+    script_link = find_script_link(soup, movie_name)
+    if not script_link:
+        logger.error(f"Unable to find script link for '{movie_name}'.")
+        return None
+    # Fetch script content
+    script_page_url = BASE_URL + script_link
+    update_progress(movie_name, 0.5, "Downloading script content...")
+    try:
+        response = requests.get(script_page_url)
+        response.raise_for_status()
+    except requests.RequestException as e:
+        logger.error(f"Failed to load the script: {str(e)}")
+        return None
+    # Extract script text
+    update_progress(movie_name, 0.6, "Extracting script text...")
+    soup = BeautifulSoup(response.text, 'html.parser')
+    script_content = soup.find('pre')
+    if script_content:
+        update_progress(movie_name, 0.7, "Script extracted successfully")
+        return script_content.get_text()
+    else:
+        logger.error("Failed to extract script content.")
+        return None
+@app.get("/api/fetch_and_analyze")
+async def fetch_and_analyze(movie_name: str):
+    """
+    Fetch and analyze a movie script, with progress tracking.
+    """
+    try:
+        # Initialize progress
+        update_progress(movie_name, 0.0, "Starting script search...")
+        # Fetch script
+        script_text = fetch_script(movie_name)
+        if not script_text:
+            raise HTTPException(status_code=404, detail="Script not found or error occurred")
+        # Analyze content
+        update_progress(movie_name, 0.8, "Analyzing script content...")
+        result = await analyze_content(script_text)
+        # Finalize
+        update_progress(movie_name, 1.0, "Analysis complete!")
+        return result
+    except Exception as e:
+        logger.error(f"Error in fetch_and_analyze: {str(e)}", exc_info=True)
+        # Clean up progress tracker in case of error
+        if movie_name in progress_tracker:
+            del progress_tracker[movie_name]
+        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
+@app.get("/api/progress")
+def get_progress(movie_name: str):
+    """
+    Get the current progress and status for a movie analysis.
+    """
+    if movie_name not in progress_tracker:
+        return {
+            "progress": 0,
+            "status": "Waiting to start..."
+        }
+    progress_info = progress_tracker[movie_name]
+    # Clean up old entries (optional)
+    current_time = datetime.now()
+    if (current_time - progress_info.timestamp).total_seconds() > 3600:  # 1 hour timeout
+        del progress_tracker[movie_name]
+        return {
+            "progress": 0,
+            "status": "Session expired. Please try again."
+        }
+    return {
+        "progress": progress_info.progress,
+        "status": progress_info.status
+    }
+@app.on_event("startup")
+async def startup_event():
+    """
+    Initialize the server and clear any existing progress data.
+    """
+    progress_tracker.clear()
+    logger.info("Server started, progress tracker initialized")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)