DeepResearchEvaluator

Running on CPU Upgrade

App Files Files Community

awacke1 commited on Dec 31, 2024

Commit

9cf3556

verified ·

1 Parent(s): 2e601c1

Update app.py

Browse files

Files changed (1) hide show

app.py +327 -180

app.py CHANGED Viewed

@@ -1,213 +1,360 @@
 import streamlit as st
 import streamlit.components.v1 as components
 import asyncio
 import edge_tts
-import os
-import base64
-import json
-from datetime import datetime
-from typing import Optional, Dict, List
-import glob
-# Configure page
 st.set_page_config(
-    page_title="Research Assistant",
     page_icon="🔬",
     layout="wide",
-    initial_sidebar_state="expanded"
 )
-# Initialize session state
-if 'current_query' not in st.session_state:
-    st.session_state.current_query = ''
-if 'last_response' not in st.session_state:
-    st.session_state.last_response = ''
-if 'audio_queue' not in st.session_state:
-    st.session_state.audio_queue = []
-if 'mermaid_history' not in st.session_state:
-    st.session_state.mermaid_history = []
-# Custom CSS
-st.markdown("""
-<style>
-    .main { background-color: #f5f5f5; }
-    .stMarkdown { font-family: 'Inter', sans-serif; }
-    .diagram-container {
-        background: white;
-        padding: 20px;
-        border-radius: 10px;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-    }
-    .results-container {
-        margin-top: 20px;
-        padding: 15px;
-        background: white;
-        border-radius: 10px;
-    }
-    .audio-player {
-        width: 100%;
-        margin: 10px 0;
     }
-</style>
-""", unsafe_allow_html=True)
-def generate_mermaid_html(mermaid_code: str, height: int = 400) -> str:
-    """Generate responsive Mermaid diagram HTML with click handling."""
-    return f"""
-    <div class="diagram-container">
-        <script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
-        <div class="mermaid" style="height: {height}px;">
-            {mermaid_code}
         </div>
-        <script>
-            mermaid.initialize({{
-                startOnLoad: true,
-                securityLevel: 'loose',
-                theme: 'default'
-            }});
-            document.addEventListener('click', (e) => {{
-                if (e.target.tagName === 'g' && e.target.classList.contains('node')) {{
-                    const nodeId = e.target.id;
-                    window.parent.postMessage({{
-                        type: 'node_clicked',
-                        nodeId: nodeId,
-                        isStreamlitMessage: true
-                    }}, '*');
-                }}
-            }});
-        </script>
-    </div>
-    """
-async def generate_speech(text: str, voice: str = "en-US-AriaNeural") -> Optional[str]:
-    """Generate speech using Edge TTS."""
     if not text.strip():
         return None
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    output_file = f"speech_{timestamp}.mp3"
-    communicate = edge_tts.Communicate(text, voice)
     await communicate.save(output_file)
     return output_file
-def process_arxiv_search(query: str) -> Dict:
-    """Process Arxiv search with your existing research code."""
-    # Integrate your Arxiv search code here
-    # This is a placeholder that simulates a response
-    return {
-        "title": "Sample Research Paper",
-        "abstract": "This is a sample abstract...",
-        "authors": ["Author 1", "Author 2"],
-        "url": "https://arxiv.org/abs/..."
     }
-def create_audio_player(file_path: str) -> str:
-    """Create an HTML audio player with download button."""
-    with open(file_path, "rb") as audio_file:
-        audio_bytes = audio_file.read()
-    audio_b64 = base64.b64encode(audio_bytes).decode()
-    return f"""
-    <div class="audio-player">
-        <audio controls style="width: 100%">
-            <source src="data:audio/mp3;base64,{audio_b64}" type="audio/mp3">
-            Your browser does not support the audio element.
-        </audio>
-        <a href="data:audio/mp3;base64,{audio_b64}"
-           download="{os.path.basename(file_path)}"
-           style="margin-top: 5px; display: inline-block;">
-           Download Audio
-        </a>
-    </div>
-    """
-def handle_node_click(node_id: str):
-    """Handle Mermaid diagram node clicks."""
-    # Convert node ID to search query
-    query = node_id.replace('_', ' ')
-    # Perform search
-    results = process_arxiv_search(query)
-    # Generate speech from results
-    asyncio.run(generate_speech(results['abstract']))
-    # Update session state
-    st.session_state.current_query = query
-    st.session_state.last_response = results
-# Main Mermaid diagram definition
-RESEARCH_DIAGRAM = """
-graph TD
-    A[Literature Review] --> B[Data Analysis]
-    B --> C[Results]
-    C --> D[Conclusions]
-    click A callback "Research Methodology"
-    click B callback "Statistical Analysis"
-    click C callback "Research Findings"
-    click D callback "Research Impact"
-    style A fill:#f9f,stroke:#333,stroke-width:4px
-    style B fill:#bbf,stroke:#333,stroke-width:4px
-    style C fill:#bfb,stroke:#333,stroke-width:4px
-    style D fill:#fbb,stroke:#333,stroke-width:4px
-"""
-def main():
-    st.title("📚 Research Assistant")
-    # Sidebar configuration
-    st.sidebar.header("Configuration")
-    voice_option = st.sidebar.selectbox(
-        "Select Voice",
-        ["en-US-AriaNeural", "en-US-GuyNeural", "en-GB-SoniaNeural"]
-    )
-    # Main layout
-    col1, col2 = st.columns([2, 3])
     with col1:
-        st.subheader("Research Map")
-        components.html(
-            generate_mermaid_html(RESEARCH_DIAGRAM),
-            height=500,
-            scrolling=True
-        )
-        st.markdown("### Recent Searches")
-        for query in st.session_state.mermaid_history[-5:]:
-            st.info(query)
     with col2:
-        st.subheader("Research Results")
-        # Manual search option
-        search_query = st.text_input("Enter search query:")
-        if st.button("Search"):
-            handle_node_click(search_query)
-        # Display current results
-        if st.session_state.last_response:
-            with st.container():
-                st.markdown("#### Latest Results")
-                st.json(st.session_state.last_response)
-                # Audio playback
-                audio_files = glob.glob("speech_*.mp3")
-                if audio_files:
-                    latest_audio = max(audio_files, key=os.path.getctime)
-                    st.markdown(create_audio_player(latest_audio), unsafe_allow_html=True)
-    # Cleanup old audio files
-    for file in glob.glob("speech_*.mp3")[:-5]:  # Keep only last 5 files
-        try:
-            os.remove(file)
-        except:
-            pass
-if __name__ == "__main__":
-    main()

 import streamlit as st
+import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile
+import plotly.graph_objects as go
 import streamlit.components.v1 as components
+from datetime import datetime
+from audio_recorder_streamlit import audio_recorder
+from bs4 import BeautifulSoup
+from collections import defaultdict, deque
+from dotenv import load_dotenv
+from gradio_client import Client
+from huggingface_hub import InferenceClient
+from io import BytesIO
+from PIL import Image
+from PyPDF2 import PdfReader
+from urllib.parse import quote
+from xml.etree import ElementTree as ET
+from openai import OpenAI
+import extra_streamlit_components as stx
 import asyncio
 import edge_tts
+# 1. App Configuration
+Site_Name = '🔬 Research Assistant Pro'
 st.set_page_config(
+    page_title=Site_Name,
     page_icon="🔬",
     layout="wide",
+    initial_sidebar_state="auto",
+    menu_items={
+        'Get Help': 'https://huggingface.co/awacke1',
+        'Report a bug': 'https://huggingface.co/spaces/awacke1',
+        'About': Site_Name
+    }
 )
+load_dotenv()
+# 2. API and Client Setup
+openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', ''))
+anthropic_key = os.getenv('ANTHROPIC_API_KEY', st.secrets.get('ANTHROPIC_API_KEY', ''))
+hf_key = os.getenv('HF_KEY', st.secrets.get('HF_KEY', ''))
+openai_client = OpenAI(api_key=openai_api_key)
+claude_client = anthropic.Anthropic(api_key=anthropic_key)
+# 3. Session State Management
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'current_audio' not in st.session_state:
+    st.session_state.current_audio = None
+if 'autoplay_audio' not in st.session_state:
+    st.session_state.autoplay_audio = True
+if 'last_search' not in st.session_state:
+    st.session_state.last_search = None
+if 'file_content' not in st.session_state:
+    st.session_state.file_content = None
+if 'current_file' not in st.session_state:
+    st.session_state.current_file = None
+# 4. Utility Functions
+def get_download_link(file_path):
+    """Generate download link for any file type"""
+    with open(file_path, "rb") as file:
+        contents = file.read()
+    b64 = base64.b64encode(contents).decode()
+    file_name = os.path.basename(file_path)
+    file_type = file_name.split('.')[-1]
+    mime_types = {
+        'md': 'text/markdown',
+        'mp3': 'audio/mpeg',
+        'mp4': 'video/mp4',
+        'pdf': 'application/pdf',
+        'txt': 'text/plain'
     }
+    mime_type = mime_types.get(file_type, 'application/octet-stream')
+    return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">⬇️ Download {file_name}</a>'
+def generate_filename(content, file_type="md"):
+    """Generate unique filename with timestamp"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    safe_content = re.sub(r'[^\w\s-]', '', content[:50])
+    return f"{timestamp}_{safe_content}.{file_type}"
+def get_autoplay_audio_html(audio_path, width="100%"):
+    """Create HTML for autoplaying audio with controls"""
+    try:
+        with open(audio_path, "rb") as audio_file:
+            audio_bytes = audio_file.read()
+            audio_b64 = base64.b64encode(audio_bytes).decode()
+            return f'''
+                <audio controls autoplay style="width: {width};">
+                    <source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg">
+                    Your browser does not support the audio element.
+                </audio>
+                <div style="margin-top: 5px;">
+                    <a href="data:audio/mpeg;base64,{audio_b64}"
+                       download="{os.path.basename(audio_path)}"
+                       style="text-decoration: none;">
+                       ⬇️ Download Audio
+                    </a>
+                </div>
+            '''
+    except Exception as e:
+        return f"Error loading audio: {str(e)}"
+def get_video_html(video_path, width="100%"):
+    """Create HTML for autoplaying video with controls"""
+    video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
+    return f'''
+    <video width="{width}" controls autoplay muted loop>
+        <source src="{video_url}" type="video/mp4">
+        Your browser does not support the video tag.
+    </video>
+    '''
+# 5. Voice Recognition Component
+def create_voice_component():
+    """Create voice recognition component with visual feedback"""
+    return components.html(
+        """
+        <div style="padding: 20px; border-radius: 10px; background: #f0f2f6;">
+            <button id="startBtn" class="streamlit-button">Start Voice Search</button>
+            <p id="status">Click to start speaking</p>
+            <div id="result"></div>
+            <script>
+                if ('webkitSpeechRecognition' in window) {
+                    const recognition = new webkitSpeechRecognition();
+                    recognition.continuous = false;
+                    recognition.interimResults = true;
+                    const startBtn = document.getElementById('startBtn');
+                    const status = document.getElementById('status');
+                    const result = document.getElementById('result');
+                    startBtn.onclick = () => {
+                        recognition.start();
+                        status.textContent = 'Listening...';
+                    };
+                    recognition.onresult = (event) => {
+                        const transcript = Array.from(event.results)
+                            .map(result => result[0].transcript)
+                            .join('');
+                        result.textContent = transcript;
+                        if (event.results[0].isFinal) {
+                            window.parent.postMessage({
+                                type: 'voice_search',
+                                query: transcript
+                            }, '*');
+                        }
+                    };
+                    recognition.onend = () => {
+                        status.textContent = 'Click to start speaking';
+                    };
+                }
+            </script>
         </div>
+        """,
+        height=200
+    )
+# 6. Audio Processing Functions
+async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"):
+    """Generate audio using Edge TTS with automatic playback"""
     if not text.strip():
         return None
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_file = f"response_{timestamp}.mp3"
+    communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
     await communicate.save(output_file)
     return output_file
+def render_audio_result(audio_file, title="Generated Audio"):
+    """Render audio result with autoplay in Streamlit"""
+    if audio_file and os.path.exists(audio_file):
+        st.markdown(f"### {title}")
+        st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True)
+# 7. Search and Process Functions
+def perform_arxiv_search(query, response_type="summary"):
+    """Perform Arxiv search with voice response"""
+    client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
+    # Get search results
+    refs = client.predict(
+        query,
+        20,
+        "Semantic Search",
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        api_name="/update_with_rag_md"
+    )[0]
+    # Get AI interpretation
+    summary = client.predict(
+        query,
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        True,
+        api_name="/ask_llm"
+    )
+    response_text = summary if response_type == "summary" else refs
+    return response_text, refs
+async def process_voice_search_with_autoplay(query):
+    """Process voice search with automatic audio playback"""
+    summary, full_results = perform_arxiv_search(query)
+    audio_file = await generate_audio(summary)
+    st.session_state.current_audio = audio_file
+    st.session_state.last_search = {
+        'query': query,
+        'summary': summary,
+        'full_results': full_results,
+        'audio': audio_file,
+        'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     }
+    if audio_file:
+        render_audio_result(audio_file, "Search Results")
+    return audio_file
+def display_search_results_with_audio():
+    """Display search results with autoplaying audio"""
+    if st.session_state.last_search:
+        st.subheader("Latest Results")
+        st.markdown(st.session_state.last_search['summary'])
+        with st.expander("View Full Results"):
+            st.markdown(st.session_state.last_search['full_results'])
+        if st.session_state.current_audio:
+            render_audio_result(st.session_state.current_audio, "Audio Summary")
+# 8. UI Components
+def render_search_interface():
+    """Render main search interface"""
+    st.header("🔍 Voice Search")
+    create_voice_component()
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        query = st.text_input("Or type your query:")
+    with col2:
+        if st.button("🔍 Search"):
+            asyncio.run(process_voice_search_with_autoplay(query))
+    display_search_results_with_audio()
+def display_search_history():
+    """Display search history with audio playback"""
+    st.header("Search History")
+    if st.session_state.chat_history:
+        for idx, entry in enumerate(reversed(st.session_state.chat_history)):
+            with st.expander(
+                f"🔍 {entry['timestamp']} - {entry['query'][:50]}...",
+                expanded=False
+            ):
+                st.markdown(entry['summary'])
+                if 'audio' in entry and entry['audio']:
+                    render_audio_result(entry['audio'], "Recorded Response")
+def render_settings():
+    """Render settings interface"""
+    st.sidebar.title("⚙️ Settings")
+    voice_options = [
+        "en-US-AriaNeural",
+        "en-US-GuyNeural",
+        "en-GB-SoniaNeural",
+        "en-AU-NatashaNeural"
+    ]
+    settings = {
+        'voice': st.sidebar.selectbox("Select Voice", voice_options),
+        'autoplay': st.sidebar.checkbox("Autoplay Responses", value=True),
+        'rate': st.sidebar.slider("Speech Rate", -50, 50, 0, 5),
+        'pitch': st.sidebar.slider("Pitch", -50, 50, 0, 5)
+    }
+    return settings
+def display_file_manager():
+    """Display file manager in sidebar"""
+    st.sidebar.title("📁 File Manager")
+    all_files = []
+    for ext in ['.md', '.mp3', '.mp4']:
+        all_files.extend(glob.glob(f"*{ext}"))
+    all_files.sort(key=os.path.getmtime, reverse=True)
+    col1, col2 = st.sidebar.columns(2)
     with col1:
+        if st.button("🗑 Delete All"):
+            for file in all_files:
+                os.remove(file)
+            st.rerun()
     with col2:
+        if st.button("⬇️ Download All"):
+            zip_name = f"archive_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
+            with zipfile.ZipFile(zip_name, 'w') as zipf:
+                for file in all_files:
+                    zipf.write(file)
+            st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True)
+    for file in all_files:
+        with st.sidebar.expander(f"📄 {os.path.basename(file)}", expanded=False):
+            st.write(f"Last modified: {datetime.fromtimestamp(os.path.getmtime(file)).strftime('%Y-%m-%d %H:%M:%S')}")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown(get_download_link(file), unsafe_allow_html=True)
+            with col2:
+                if st.button("🗑 Delete", key=f"del_{file}"):
+                    os.remove(file)
+                    st.rerun()
+# 9. Main Application
+def main():
+    st.title("🔬 Research Assistant Pro")
+    settings = render_settings()
+    display_file_manager()
+    tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Settings"])
+    with tabs[0]:
+        render_search_interface()
+    with tabs[1]:
+        display_search_history()
+    with tabs[2]:
+        st.header("Media Gallery")
+        media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"])
+        with media_tabs[0]:
+            audio_files = glob.glob("*.mp3")
+            if audio_files:
+                for audio_file in audio_files:
+                    st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True)
+            else:
+                st.write("No audio files found")
+        with media_tabs[1]:
+            video_files = glob.glob("*.mp4")
+            if video_files:
+                for video_file in video_files:
+                    st.markdown(get_video_html(video_file), unsafe_allow_html=True)
+            else:
+                st.write("No video files found")
+        with media_tabs[2]:
+            image_files = glob.glob("*.png") + glob.