Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import streamlit as st | |
import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, time, zipfile | |
import plotly.graph_objects as go | |
import streamlit.components.v1 as components | |
from datetime import datetime | |
from audio_recorder_streamlit import audio_recorder | |
from bs4 import BeautifulSoup | |
from collections import defaultdict, deque | |
from dotenv import load_dotenv | |
from gradio_client import Client | |
from huggingface_hub import InferenceClient | |
from io import BytesIO | |
from PIL import Image | |
from PyPDF2 import PdfReader | |
from urllib.parse import quote | |
from xml.etree import ElementTree as ET | |
from openai import OpenAI | |
import extra_streamlit_components as stx | |
import asyncio | |
import edge_tts | |
# 1. App Configuration | |
Site_Name = '🔬 Research Assistant Pro' | |
st.set_page_config( | |
page_title=Site_Name, | |
page_icon="🔬", | |
layout="wide", | |
initial_sidebar_state="auto", | |
menu_items={ | |
'Get Help': 'https://huggingface.co/awacke1', | |
'Report a bug': 'https://huggingface.co/spaces/awacke1', | |
'About': Site_Name | |
} | |
) | |
load_dotenv() | |
# 2. API and Client Setup | |
openai_api_key = os.getenv('OPENAI_API_KEY', st.secrets.get('OPENAI_API_KEY', '')) | |
anthropic_key = os.getenv('ANTHROPIC_API_KEY', st.secrets.get('ANTHROPIC_API_KEY', '')) | |
hf_key = os.getenv('HF_KEY', st.secrets.get('HF_KEY', '')) | |
openai_client = OpenAI(api_key=openai_api_key) | |
claude_client = anthropic.Anthropic(api_key=anthropic_key) | |
# 3. Session State Management | |
if 'chat_history' not in st.session_state: | |
st.session_state.chat_history = [] | |
if 'current_audio' not in st.session_state: | |
st.session_state.current_audio = None | |
if 'autoplay_audio' not in st.session_state: | |
st.session_state.autoplay_audio = True | |
if 'last_search' not in st.session_state: | |
st.session_state.last_search = None | |
if 'file_content' not in st.session_state: | |
st.session_state.file_content = None | |
if 'current_file' not in st.session_state: | |
st.session_state.current_file = None | |
# 4. Utility Functions | |
def get_download_link(file_path): | |
"""Generate download link for any file type""" | |
with open(file_path, "rb") as file: | |
contents = file.read() | |
b64 = base64.b64encode(contents).decode() | |
file_name = os.path.basename(file_path) | |
file_type = file_name.split('.')[-1] | |
mime_types = { | |
'md': 'text/markdown', | |
'mp3': 'audio/mpeg', | |
'mp4': 'video/mp4', | |
'pdf': 'application/pdf', | |
'txt': 'text/plain' | |
} | |
mime_type = mime_types.get(file_type, 'application/octet-stream') | |
return f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">⬇️ Download {file_name}</a>' | |
def generate_filename(content, file_type="md"): | |
"""Generate unique filename with timestamp""" | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
safe_content = re.sub(r'[^\w\s-]', '', content[:50]) | |
return f"{timestamp}_{safe_content}.{file_type}" | |
def get_autoplay_audio_html(audio_path, width="100%"): | |
"""Create HTML for autoplaying audio with controls""" | |
try: | |
with open(audio_path, "rb") as audio_file: | |
audio_bytes = audio_file.read() | |
audio_b64 = base64.b64encode(audio_bytes).decode() | |
return f''' | |
<audio controls autoplay style="width: {width};"> | |
<source src="data:audio/mpeg;base64,{audio_b64}" type="audio/mpeg"> | |
Your browser does not support the audio element. | |
</audio> | |
<div style="margin-top: 5px;"> | |
<a href="data:audio/mpeg;base64,{audio_b64}" | |
download="{os.path.basename(audio_path)}" | |
style="text-decoration: none;"> | |
⬇️ Download Audio | |
</a> | |
</div> | |
''' | |
except Exception as e: | |
return f"Error loading audio: {str(e)}" | |
def get_video_html(video_path, width="100%"): | |
"""Create HTML for autoplaying video with controls""" | |
video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}" | |
return f''' | |
<video width="{width}" controls autoplay muted loop> | |
<source src="{video_url}" type="video/mp4"> | |
Your browser does not support the video tag. | |
</video> | |
''' | |
# 5. Voice Recognition Component | |
def create_voice_component(): | |
"""Create voice recognition component with visual feedback""" | |
return components.html( | |
""" | |
<div style="padding: 20px; border-radius: 10px; background: #f0f2f6;"> | |
<button id="startBtn" class="streamlit-button">Start Voice Search</button> | |
<p id="status">Click to start speaking</p> | |
<div id="result"></div> | |
<script> | |
if ('webkitSpeechRecognition' in window) { | |
const recognition = new webkitSpeechRecognition(); | |
recognition.continuous = false; | |
recognition.interimResults = true; | |
const startBtn = document.getElementById('startBtn'); | |
const status = document.getElementById('status'); | |
const result = document.getElementById('result'); | |
startBtn.onclick = () => { | |
recognition.start(); | |
status.textContent = 'Listening...'; | |
}; | |
recognition.onresult = (event) => { | |
const transcript = Array.from(event.results) | |
.map(result => result[0].transcript) | |
.join(''); | |
result.textContent = transcript; | |
if (event.results[0].isFinal) { | |
window.parent.postMessage({ | |
type: 'voice_search', | |
query: transcript | |
}, '*'); | |
} | |
}; | |
recognition.onend = () => { | |
status.textContent = 'Click to start speaking'; | |
}; | |
} | |
</script> | |
</div> | |
""", | |
height=200 | |
) | |
# 6. Audio Processing Functions | |
async def generate_audio(text, voice="en-US-AriaNeural", rate="+0%", pitch="+0Hz"): | |
"""Generate audio using Edge TTS with automatic playback""" | |
if not text.strip(): | |
return None | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
output_file = f"response_{timestamp}.mp3" | |
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) | |
await communicate.save(output_file) | |
return output_file | |
def render_audio_result(audio_file, title="Generated Audio"): | |
"""Render audio result with autoplay in Streamlit""" | |
if audio_file and os.path.exists(audio_file): | |
st.markdown(f"### {title}") | |
st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True) | |
# 7. Search and Process Functions | |
def perform_arxiv_search(query, response_type="summary"): | |
"""Perform Arxiv search with voice response""" | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
# Get search results | |
refs = client.predict( | |
query, | |
20, | |
"Semantic Search", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
api_name="/update_with_rag_md" | |
)[0] | |
# Get AI interpretation | |
summary = client.predict( | |
query, | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
True, | |
api_name="/ask_llm" | |
) | |
response_text = summary if response_type == "summary" else refs | |
return response_text, refs | |
async def process_voice_search_with_autoplay(query): | |
"""Process voice search with automatic audio playback""" | |
summary, full_results = perform_arxiv_search(query) | |
audio_file = await generate_audio(summary) | |
st.session_state.current_audio = audio_file | |
st.session_state.last_search = { | |
'query': query, | |
'summary': summary, | |
'full_results': full_results, | |
'audio': audio_file, | |
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
if audio_file: | |
render_audio_result(audio_file, "Search Results") | |
return audio_file | |
def display_search_results_with_audio(): | |
"""Display search results with autoplaying audio""" | |
if st.session_state.last_search: | |
st.subheader("Latest Results") | |
st.markdown(st.session_state.last_search['summary']) | |
with st.expander("View Full Results"): | |
st.markdown(st.session_state.last_search['full_results']) | |
if st.session_state.current_audio: | |
render_audio_result(st.session_state.current_audio, "Audio Summary") | |
# 8. UI Components | |
def render_search_interface(): | |
"""Render main search interface""" | |
st.header("🔍 Voice Search") | |
create_voice_component() | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
query = st.text_input("Or type your query:") | |
with col2: | |
if st.button("🔍 Search"): | |
asyncio.run(process_voice_search_with_autoplay(query)) | |
display_search_results_with_audio() | |
def display_search_history(): | |
"""Display search history with audio playback""" | |
st.header("Search History") | |
if st.session_state.chat_history: | |
for idx, entry in enumerate(reversed(st.session_state.chat_history)): | |
with st.expander( | |
f"🔍 {entry['timestamp']} - {entry['query'][:50]}...", | |
expanded=False | |
): | |
st.markdown(entry['summary']) | |
if 'audio' in entry and entry['audio']: | |
render_audio_result(entry['audio'], "Recorded Response") | |
def render_settings(): | |
"""Render settings interface""" | |
st.sidebar.title("⚙️ Settings") | |
voice_options = [ | |
"en-US-AriaNeural", | |
"en-US-GuyNeural", | |
"en-GB-SoniaNeural", | |
"en-AU-NatashaNeural" | |
] | |
settings = { | |
'voice': st.sidebar.selectbox("Select Voice", voice_options), | |
'autoplay': st.sidebar.checkbox("Autoplay Responses", value=True), | |
'rate': st.sidebar.slider("Speech Rate", -50, 50, 0, 5), | |
'pitch': st.sidebar.slider("Pitch", -50, 50, 0, 5) | |
} | |
return settings | |
def display_file_manager(): | |
"""Display file manager in sidebar""" | |
st.sidebar.title("📁 File Manager") | |
all_files = [] | |
for ext in ['.md', '.mp3', '.mp4']: | |
all_files.extend(glob.glob(f"*{ext}")) | |
all_files.sort(key=os.path.getmtime, reverse=True) | |
col1, col2 = st.sidebar.columns(2) | |
with col1: | |
if st.button("🗑 Delete All"): | |
for file in all_files: | |
os.remove(file) | |
st.rerun() | |
with col2: | |
if st.button("⬇️ Download All"): | |
zip_name = f"archive_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip" | |
with zipfile.ZipFile(zip_name, 'w') as zipf: | |
for file in all_files: | |
zipf.write(file) | |
st.sidebar.markdown(get_download_link(zip_name), unsafe_allow_html=True) | |
for file in all_files: | |
with st.sidebar.expander(f"📄 {os.path.basename(file)}", expanded=False): | |
st.write(f"Last modified: {datetime.fromtimestamp(os.path.getmtime(file)).strftime('%Y-%m-%d %H:%M:%S')}") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown(get_download_link(file), unsafe_allow_html=True) | |
with col2: | |
if st.button("🗑 Delete", key=f"del_{file}"): | |
os.remove(file) | |
st.rerun() | |
# 9. Main Application | |
def main(): | |
st.title("🔬 Research Assistant Pro") | |
settings = render_settings() | |
display_file_manager() | |
tabs = st.tabs(["🎤 Voice Search", "📚 History", "🎵 Media", "⚙️ Settings"]) | |
with tabs[0]: | |
render_search_interface() | |
with tabs[1]: | |
display_search_history() | |
with tabs[2]: | |
st.header("Media Gallery") | |
media_tabs = st.tabs(["🎵 Audio", "🎥 Video", "📷 Images"]) | |
with media_tabs[0]: | |
audio_files = glob.glob("*.mp3") | |
if audio_files: | |
for audio_file in audio_files: | |
st.markdown(get_autoplay_audio_html(audio_file), unsafe_allow_html=True) | |
else: | |
st.write("No audio files found") | |
with media_tabs[1]: | |
video_files = glob.glob("*.mp4") | |
if video_files: | |
for video_file in video_files: | |
st.markdown(get_video_html(video_file), unsafe_allow_html=True) | |
else: | |
st.write("No video files found") | |
with media_tabs[2]: | |
image_files = glob.glob("*.png") + glob.glob("*.jpg") + glob.glob("*.jpeg") | |
if image_files: | |
cols = st.columns(3) | |
for idx, image_file in enumerate(image_files): | |
with cols[idx % 3]: | |
st.image(Image.open(image_file), use_column_width=True) | |
st.markdown(get_download_link(image_file), unsafe_allow_html=True) | |
else: | |
st.write("No image files found") | |
with tabs[3]: | |
st.header("Advanced Settings") | |
st.subheader("Audio Settings") | |
audio_settings = { | |
'quality': st.select_slider( | |
"Audio Quality", | |
options=["Low", "Medium", "High"], | |
value="Medium" | |
), | |
'save_history': st.checkbox( | |
"Save Audio History", | |
value=True, | |
help="Save generated audio files in history" | |
), | |
'max_duration': st.slider( | |
"Max Audio Duration (seconds)", | |
min_value=30, | |
max_value=300, | |
value=120, | |
step=30 | |
) | |
} | |
st.subheader("Search Settings") | |
search_settings = { | |
'max_results': st.slider( | |
"Max Search Results", | |
min_value=5, | |
max_value=50, | |
value=20 | |
), | |
'include_citations': st.checkbox( | |
"Include Citations", | |
value=True | |
), | |
'auto_summarize': st.checkbox( | |
"Auto-Summarize Results", | |
value=True | |
) | |
} | |
st.subheader("File Management") | |
file_settings = { | |
'auto_cleanup': st.checkbox( | |
"Auto-cleanup Old Files", | |
value=False, | |
help="Automatically remove files older than the specified duration" | |
) | |
} | |
if file_settings['auto_cleanup']: | |
file_settings['cleanup_days'] = st.number_input( | |
"Days to keep files", | |
min_value=1, | |
max_value=30, | |
value=7 | |
) | |
# 10. Custom CSS Styling | |
st.markdown(""" | |
<style> | |
.main { | |
background: linear-gradient(135deg, #f5f7fa 0%, #e8edf5 100%); | |
} | |
.stButton>button { | |
background-color: #4CAF50; | |
color: white; | |
padding: 0.5rem 1rem; | |
border-radius: 5px; | |
border: none; | |
transition: background-color 0.3s; | |
} | |
.stButton>button:hover { | |
background-color: #45a049; | |
} | |
.audio-player { | |
margin: 1rem 0; | |
padding: 1rem; | |
border-radius: 10px; | |
background: white; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.file-manager { | |
padding: 1rem; | |
background: white; | |
border-radius: 10px; | |
margin: 1rem 0; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() |