import streamlit as st import anthropic import openai import base64 import os import re import asyncio from datetime import datetime from gradio_client import Client from collections import defaultdict import edge_tts # 🎯 1. Core Configuration & Setup st.set_page_config( page_title="🚲BikeAIπŸ† Claude/GPT Research", page_icon="πŸš²πŸ†", layout="wide", initial_sidebar_state="auto", menu_items={ 'Get Help': 'https://huggingface.co/awacke1', 'Report a bug': 'https://huggingface.co/spaces/awacke1', 'About': "🚲BikeAIπŸ† Claude/GPT Research AI" } ) st.markdown(""" """, unsafe_allow_html=True) # πŸ”‘ 2. API Setup & Clients from dotenv import load_dotenv load_dotenv() openai_api_key = os.getenv('OPENAI_API_KEY', "") anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "") if 'OPENAI_API_KEY' in st.secrets: openai_api_key = st.secrets['OPENAI_API_KEY'] if 'ANTHROPIC_API_KEY' in st.secrets: anthropic_key = st.secrets["ANTHROPIC_API_KEY"] openai.api_key = openai_api_key claude_client = anthropic.Anthropic(api_key=anthropic_key) openai_client = openai # Using OpenAI directly # πŸ“ 3. Session State Management if 'transcript_history' not in st.session_state: st.session_state['transcript_history'] = [] if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] if 'openai_model' not in st.session_state: st.session_state['openai_model'] = "gpt-4" # Update as needed if 'messages' not in st.session_state: st.session_state['messages'] = [] if 'viewing_prefix' not in st.session_state: st.session_state['viewing_prefix'] = None if 'should_rerun' not in st.session_state: st.session_state['should_rerun'] = False if 'old_val' not in st.session_state: st.session_state['old_val'] = None # 🧠 4. High-Information Content Extraction def get_high_info_terms(text: str) -> list: """Extract high-information terms from text, including key phrases.""" stop_words = set([ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there' ]) key_phrases = [ 'artificial intelligence', 'machine learning', 'deep learning', 'neural network', 'personal assistant', 'natural language', 'computer vision', 'data science', 'reinforcement learning', 'knowledge graph', 'semantic search', 'time series', 'large language model', 'transformer model', 'attention mechanism', 'autonomous system', 'edge computing', 'quantum computing', 'blockchain technology', 'cognitive science', 'human computer', 'decision making', 'arxiv search', 'research paper', 'scientific study', 'empirical analysis' ] # Identify key phrases preserved_phrases = [] lower_text = text.lower() for phrase in key_phrases: if phrase in lower_text: preserved_phrases.append(phrase) text = text.replace(phrase, '') # Extract individual words words = re.findall(r'\b\w+(?:-\w+)*\b', text) high_info_words = [ word.lower() for word in words if len(word) > 3 and word.lower() not in stop_words and not word.isdigit() and any(c.isalpha() for c in word) ] all_terms = preserved_phrases + high_info_words seen = set() unique_terms = [] for term in all_terms: if term not in seen: seen.add(term) unique_terms.append(term) max_terms = 5 return unique_terms[:max_terms] def clean_text_for_filename(text: str) -> str: """Remove punctuation and short filler words, return a compact string.""" text = text.lower() text = re.sub(r'[^\w\s-]', '', text) words = text.split() stop_short = set(['the','and','for','with','this','that','from','just','very','then','been','only','also','about']) filtered = [w for w in words if len(w)>3 and w not in stop_short] return '_'.join(filtered)[:200] # πŸ“ 5. File Operations def generate_filename(prompt, response, file_type="md"): """ Generate filename with meaningful terms and short dense clips from prompt & response. The filename should be about 150 chars total, include high-info terms, and a clipped snippet. """ prefix = datetime.now().strftime("%y%m_%H%M") + "_" combined = (prompt + " " + response).strip() info_terms = get_high_info_terms(combined) # Include a short snippet from prompt and response snippet = (prompt[:100] + " " + response[:100]).strip() snippet_cleaned = clean_text_for_filename(snippet) # Combine info terms and snippet name_parts = info_terms + [snippet_cleaned] full_name = '_'.join(name_parts) # Trim to ~150 chars if len(full_name) > 150: full_name = full_name[:150] filename = f"{prefix}{full_name}.{file_type}" return filename def create_file(prompt, response, file_type="md"): """Create file with an intelligent naming scheme.""" filename = generate_filename(prompt.strip(), response.strip(), file_type) with open(filename, 'w', encoding='utf-8') as f: f.write(prompt + "\n\n" + response) return filename def get_download_link(file): """Generate download link for file""" with open(file, "rb") as f: b64 = base64.b64encode(f.read()).decode() return f'πŸ“‚ Download {os.path.basename(file)}' # πŸ”Š 6. Audio Processing def clean_for_speech(text: str) -> str: """Clean text for speech synthesis""" text = text.replace("\n", " ") text = text.replace("", " ") text = text.replace("#", "") text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text) text = re.sub(r"\s+", " ", text).strip() return text async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, out_fn="temp.mp3"): """Generate audio using Edge TTS (async)""" text = clean_for_speech(text) if not text.strip(): return None rate_str = f"{rate:+d}%" pitch_str = f"{pitch:+d}Hz" communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str) await communicate.save(out_fn) return out_fn def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, out_fn="temp.mp3"): """Wrapper for Edge TTS generation (sync)""" return asyncio.run(edge_tts_generate_audio(text, voice, rate, pitch, out_fn)) def play_and_download_audio(file_path): """Play and provide a download link for audio""" if file_path and os.path.exists(file_path): st.audio(file_path) dl_link = f'Download {os.path.basename(file_path)}' st.markdown(dl_link, unsafe_allow_html=True) def auto_play_audio(file_path): """Embeds an