import streamlit as st
import anthropic
import openai
import base64
import os
import re
import asyncio
from datetime import datetime
from gradio_client import Client
from collections import defaultdict
import edge_tts
# π― 1. Core Configuration & Setup
page_title="π²BikeAIπ Claude/GPT Research",
'Get Help': '',
'Report a bug': '',
'About': "π²BikeAIπ Claude/GPT Research AI"
""", unsafe_allow_html=True)
# π 2. API Setup & Clients
from dotenv import load_dotenv
openai_api_key = os.getenv('OPENAI_API_KEY', "")
anthropic_key = os.getenv('ANTHROPIC_API_KEY_3', "")
if 'OPENAI_API_KEY' in st.secrets:
openai_api_key = st.secrets['OPENAI_API_KEY']
if 'ANTHROPIC_API_KEY' in st.secrets:
anthropic_key = st.secrets["ANTHROPIC_API_KEY"]
openai.api_key = openai_api_key
claude_client = anthropic.Anthropic(api_key=anthropic_key)
openai_client = openai # Using OpenAI directly
# π 3. Session State Management
if 'transcript_history' not in st.session_state:
st.session_state['transcript_history'] = []
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
if 'openai_model' not in st.session_state:
st.session_state['openai_model'] = "gpt-4" # Update as needed
if 'messages' not in st.session_state:
st.session_state['messages'] = []
if 'viewing_prefix' not in st.session_state:
st.session_state['viewing_prefix'] = None
if 'should_rerun' not in st.session_state:
st.session_state['should_rerun'] = False
if 'old_val' not in st.session_state:
st.session_state['old_val'] = None
# π§ 4. High-Information Content Extraction
def get_high_info_terms(text: str) -> list:
"""Extract high-information terms from text, including key phrases."""
stop_words = set([
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
'by', 'from', 'up', 'about', 'into', 'over', 'after', 'is', 'are', 'was', 'were',
'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
'should', 'could', 'might', 'must', 'shall', 'can', 'may', 'this', 'that', 'these',
'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who',
'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most',
'other', 'some', 'such', 'than', 'too', 'very', 'just', 'there'
key_phrases = [
'artificial intelligence', 'machine learning', 'deep learning', 'neural network',
'personal assistant', 'natural language', 'computer vision', 'data science',
'reinforcement learning', 'knowledge graph', 'semantic search', 'time series',
'large language model', 'transformer model', 'attention mechanism',
'autonomous system', 'edge computing', 'quantum computing', 'blockchain technology',
'cognitive science', 'human computer', 'decision making', 'arxiv search',
'research paper', 'scientific study', 'empirical analysis'
# Identify key phrases
preserved_phrases = []
lower_text = text.lower()
for phrase in key_phrases:
if phrase in lower_text:
text = text.replace(phrase, '')
# Extract individual words
words = re.findall(r'\b\w+(?:-\w+)*\b', text)
high_info_words = [
word.lower() for word in words
if len(word) > 3
and word.lower() not in stop_words
and not word.isdigit()
and any(c.isalpha() for c in word)
all_terms = preserved_phrases + high_info_words
seen = set()
unique_terms = []
for term in all_terms:
if term not in seen:
max_terms = 5
return unique_terms[:max_terms]
def clean_text_for_filename(text: str) -> str:
"""Remove punctuation and short filler words, return a compact string."""
text = text.lower()
text = re.sub(r'[^\w\s-]', '', text)
words = text.split()
stop_short = set(['the','and','for','with','this','that','from','just','very','then','been','only','also','about'])
filtered = [w for w in words if len(w)>3 and w not in stop_short]
return '_'.join(filtered)[:200]
# π 5. File Operations
def generate_filename(prompt, response, file_type="md"):
Generate filename with meaningful terms and short dense clips from prompt & response.
The filename should be about 150 chars total, include high-info terms, and a clipped snippet.
prefix ="%y%m_%H%M") + "_"
combined = (prompt + " " + response).strip()
info_terms = get_high_info_terms(combined)
# Include a short snippet from prompt and response
snippet = (prompt[:100] + " " + response[:100]).strip()
snippet_cleaned = clean_text_for_filename(snippet)
# Combine info terms and snippet
name_parts = info_terms + [snippet_cleaned]
full_name = '_'.join(name_parts)
# Trim to ~150 chars
if len(full_name) > 150:
full_name = full_name[:150]
filename = f"{prefix}{full_name}.{file_type}"
return filename
def create_file(prompt, response, file_type="md"):
"""Create file with an intelligent naming scheme."""
filename = generate_filename(prompt.strip(), response.strip(), file_type)
with open(filename, 'w', encoding='utf-8') as f:
f.write(prompt + "\n\n" + response)
return filename
def get_download_link(file):
"""Generate download link for file"""
with open(file, "rb") as f:
b64 = base64.b64encode(
return f'π Download {os.path.basename(file)}'
# π 6. Audio Processing
def clean_for_speech(text: str) -> str:
"""Clean text for speech synthesis"""
text = text.replace("\n", " ")
text = text.replace("", " ")
text = text.replace("#", "")
text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
text = re.sub(r"\s+", " ", text).strip()
return text
async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0, out_fn="temp.mp3"):
"""Generate audio using Edge TTS (async)"""
text = clean_for_speech(text)
if not text.strip():
return None
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
return out_fn
def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, out_fn="temp.mp3"):
"""Wrapper for Edge TTS generation (sync)"""
return, voice, rate, pitch, out_fn))
def play_and_download_audio(file_path):
"""Play and provide a download link for audio"""
if file_path and os.path.exists(file_path):
dl_link = f'Download {os.path.basename(file_path)}'
st.markdown(dl_link, unsafe_allow_html=True)
def auto_play_audio(file_path):
"""Embeds an