PDF-text-to-speech-Per-Page

Running

App Files Files Community

PDF-text-to-speech-Per-Page / app.py

awacke1

Create app.py

b5b6b67 verified 30 days ago

raw

history blame contribute delete

5.88 kB

	import streamlit as st
	import base64
	from gtts import gTTS
	from io import BytesIO
	import os
	from PyPDF2 import PdfReader
	import threading
	import time
	import hashlib
	from datetime import datetime
	import json

	class AudioProcessor:
	def __init__(self):
	self.cache_dir = "audio_cache"
	os.makedirs(self.cache_dir, exist_ok=True)
	self.metadata = self._load_metadata()

	def _load_metadata(self):
	metadata_file = os.path.join(self.cache_dir, "metadata.json")
	return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {}

	def create_audio(self, text, lang='en'):
	cache_key = hashlib.md5(f"{text}:{lang}".encode()).hexdigest()
	cache_path = os.path.join(self.cache_dir, f"{cache_key}.wav")

	if cache_key in self.metadata and os.path.exists(cache_path):
	return BytesIO(open(cache_path, 'rb').read())

	sound_file = BytesIO()
	tts = gTTS(text, lang=lang)
	tts.write_to_fp(sound_file)

	with open(cache_path, 'wb') as f:
	f.write(sound_file.getvalue())

	self.metadata[cache_key] = {
	'timestamp': datetime.now().isoformat(),
	'text_length': len(text),
	'lang': lang
	}

	sound_file.seek(0)
	return sound_file

	def get_download_link(bin_data, filename, size_mb=None):
	b64 = base64.b64encode(bin_data).decode()
	size_str = f"({size_mb:.1f} MB)" if size_mb else ""
	return f'''
	<div class="download-container">
	<a href="data:application/octet-stream;base64,{b64}"
	download="{filename}" class="download-link">📥 {filename}</a>
	<div class="file-info">{size_str}</div>
	</div>
	'''

	def process_pdf(pdf_file, max_pages, language, audio_processor):
	reader = PdfReader(pdf_file)
	total_pages = min(len(reader.pages), max_pages)
	texts, audios = [], {}

	for i in range(total_pages):
	text = reader.pages[i].extract_text()
	texts.append(text)

	# Process audio in background
	threading.Thread(
	target=lambda: audios.update({i: audio_processor.create_audio(text, language)})
	).start()

	return texts, audios, total_pages

	def main():
	st.set_page_config(page_title="📚 PDF to Audio 🎧", page_icon="🎉", layout="wide")

	# Apply styling
	st.markdown("""
	<style>
	.download-link {
	color: #1E90FF;
	text-decoration: none;
	padding: 8px 12px;
	margin: 5px;
	border: 1px solid #1E90FF;
	border-radius: 5px;
	display: inline-block;
	transition: all 0.3s ease;
	}
	.download-link:hover {
	background-color: #1E90FF;
	color: white;
	}
	.file-info {
	font-size: 0.8em;
	color: gray;
	margin-top: 4px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize processor
	audio_processor = AudioProcessor()

	# Sidebar settings
	st.sidebar.title("📥 Downloads & Settings")
	language = st.sidebar.selectbox(
	"Select Language",
	options=['en', 'es', 'fr', 'de', 'it'],
	format_func=lambda x: {'en': 'English', 'es': 'Spanish', 'fr': 'French',
	'de': 'German', 'it': 'Italian'}[x]
	)

	# Main interface
	st.markdown("<h1>📚 PDF to Audio Converter 🎧</h1>", unsafe_allow_html=True)

	col1, col2 = st.columns(2)
	with col1:
	uploaded_file = st.file_uploader("Choose a PDF file", "pdf")
	with col2:
	max_pages = st.slider('Select pages to process', min_value=1, max_value=100, value=10)

	if uploaded_file:
	progress_bar = st.progress(0)
	status = st.empty()

	with st.spinner('Processing PDF...'):
	texts, audios, total_pages = process_pdf(uploaded_file, max_pages, language, audio_processor)

	for i, text in enumerate(texts):
	with st.expander(f"Page {i+1}", expanded=i==0):
	st.markdown(text)

	# Wait for audio processing
	while i not in audios:
	time.sleep(0.1)
	st.audio(audios[i], format='audio/wav')

	# Add download link
	size_mb = len(audios[i].getvalue()) / (1024 * 1024)
	st.sidebar.markdown(
	get_download_link(audios[i].getvalue(), f'page_{i+1}.wav', size_mb),
	unsafe_allow_html=True
	)

	progress_bar.progress((i + 1) / total_pages)
	status.text(f"Processing page {i+1}/{total_pages}")

	st.success(f"✅ Successfully processed {total_pages} pages!")

	# Text to Audio section
	st.markdown("### ✍️ Text to Audio")
	prompt = st.text_area("Enter text to convert to audio", height=200)

	if prompt:
	with st.spinner('Converting text to audio...'):
	sound_file = audio_processor.create_audio(prompt, language)
	st.audio(sound_file, format='audio/wav')

	size_mb = len(sound_file.getvalue()) / (1024 * 1024)
	st.sidebar.markdown("### 🎵 Custom Audio")
	st.sidebar.markdown(
	get_download_link(sound_file.getvalue(), 'custom_text.wav', size_mb),
	unsafe_allow_html=True
	)

	# Cache management
	if st.sidebar.button("Clear Cache"):
	for file in os.listdir(audio_processor.cache_dir):
	os.remove(os.path.join(audio_processor.cache_dir, file))
	st.sidebar.success("Cache cleared successfully!")

	if __name__ == "__main__":
	main()