Spaces:
Runtime error
Runtime error
import json | |
import logging | |
import datetime | |
import time | |
import requests | |
import pytz | |
import unittest | |
import sqlite3 # Import sqlite3 for database handling | |
import gradio as gr | |
from deep_translator import GoogleTranslator | |
from deep_translator.exceptions import NotValidLength, RequestError | |
from utils import process_json_files, flatten_text_with_line_breaks, calculate_tanach_statistics, build_word_index | |
# Set up logging | |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Load Tanach text | |
TANACH_DATA = process_json_files(1, 39) | |
WORD_INDEX = build_word_index(TANACH_DATA) | |
# --- Database Setup --- | |
# Use a connection function to ensure each thread gets its own connection | |
def get_db_connection(): | |
conn = sqlite3.connect('translation_cache.db') | |
conn.row_factory = sqlite3.Row # This line allows accessing columns by name | |
return conn | |
# Create the database table if it doesn't exist | |
with get_db_connection() as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS translations ( | |
book_id INTEGER, | |
chapter_id INTEGER, | |
english_text TEXT, | |
PRIMARY KEY (book_id, chapter_id) | |
) | |
''') | |
conn.commit() | |
# --- Utility Functions --- | |
def get_current_word_data(client_time_str): | |
"""Gets data about the current word based on the client's time.""" | |
try: | |
client_time = datetime.datetime.strptime(client_time_str, "%H:%M:%S") | |
total_seconds = int(client_time.strftime("%H")) * 3600 + \ | |
int(client_time.strftime("%M")) * 60 + \ | |
int(client_time.strftime("%S")) | |
# Find the closest key in WORD_INDEX | |
word_position = min(WORD_INDEX.keys(), key=lambda k: abs(k - total_seconds)) | |
return WORD_INDEX[word_position], word_position | |
except Exception as e: | |
logging.error(f"Error processing client time: {e}") | |
return None, None | |
def get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=True): | |
"""Returns a formatted verse with optional word highlighting.""" | |
chapter_text = TANACH_DATA[book_id]["text"][chapter_id] | |
flattened_chapter = flatten_text_with_line_breaks(chapter_text) | |
return '<br>'.join(flattened_chapter) | |
def translate_chapter(hebrew_chapter, book_id, chapter_id): | |
"""Translates a Hebrew chapter to English, caching the result in the database.""" | |
# Get a new database connection for this thread | |
with get_db_connection() as conn: | |
cursor = conn.cursor() | |
# Check if translation exists in the database | |
cursor.execute( | |
"SELECT english_text FROM translations WHERE book_id=? AND chapter_id=?", | |
(book_id, chapter_id) | |
) | |
result = cursor.fetchone() | |
if result: | |
return result['english_text'].split('\n') # Retrieve from database and split into lines | |
try: | |
translator = GoogleTranslator(source='iw', target='en') | |
max_length = 2000 # Slightly below the limit to be safe | |
translated_text = "" | |
# Split the chapter into chunks smaller than the max length | |
chunks = [hebrew_chapter[i:i + max_length] for i in range(0, len(hebrew_chapter), max_length)] | |
for chunk in chunks: | |
translated_text += translator.translate(chunk) | |
# Store the translation in the database | |
cursor.execute( | |
"INSERT INTO translations (book_id, chapter_id, english_text) VALUES (?, ?, ?)", | |
(book_id, chapter_id, translated_text) | |
) | |
conn.commit() | |
return translated_text.split('\n') # Return as list of lines | |
except RequestError as e: | |
logging.warning(f"Translation failed: Request Error - {e}") | |
return ["Translation unavailable: Request Error"] | |
def update_tanach_display(client_time_str, timezone): | |
"""Updates the Gradio interface with client time, verse info, and translations.""" | |
try: | |
# Get timezone offset using pytz | |
tz = pytz.timezone(timezone) | |
offset = tz.utcoffset(datetime.datetime.now()).total_seconds() / 3600 # Offset in hours | |
# Adjust client time based on the timezone | |
client_time_obj = datetime.datetime.strptime(client_time_str, "%H:%M:%S") | |
client_time_obj = client_time_obj.replace(tzinfo=datetime.timezone(datetime.timedelta(hours=offset))) | |
client_time_str = client_time_obj.strftime("%H:%M:%S") | |
except Exception as e: | |
logging.error(f"Error adjusting client time based on timezone: {e}") | |
return "Error: Invalid Timezone", "", "" | |
word_data, word_position = get_current_word_data(client_time_str) | |
if word_data is None: | |
logging.error(f"Word position {word_position} not found in index.") | |
return "Error: Word not found", "", "" | |
book_id = word_data["book_id"] | |
chapter_id = word_data["chapter_id"] | |
verse_id = word_data["verse_id"] | |
logging.debug(f"Book ID: {book_id}, Chapter ID: {chapter_id}, Verse ID: {verse_id}") | |
# Format verse information | |
verse_info = f""" | |
**{TANACH_DATA[book_id]['title']}** | |
Chapter {chapter_id + 1}, Verse {verse_id} | |
""" | |
# Get and format Hebrew and English verses | |
hebrew_verse = get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=False) | |
hebrew_verses = hebrew_verse.split("<br>") | |
# Translate the entire chapter and get the correct verse | |
hebrew_chapter = flatten_text_with_line_breaks(TANACH_DATA[book_id]["text"][chapter_id]) | |
english_chapter = translate_chapter('\n'.join(hebrew_chapter), book_id, chapter_id) | |
# Highlight the current verse in both Hebrew and English | |
hebrew_verses[verse_id - 1] = f"<span class='highlight'>{hebrew_verses[verse_id - 1]}</span>" | |
english_chapter[verse_id - 1] = f"<span class='highlight'>{english_chapter[verse_id - 1]}</span>" | |
# Join the verses back with <br> for display | |
hebrew_verse = "<br>".join(hebrew_verses) | |
english_verse = "<br>".join(english_chapter) | |
return verse_info, hebrew_verse, english_verse | |
# --- Gradio Interface --- | |
with gr.Blocks(css=""" | |
.container { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
font-family: 'Times New Roman', serif; | |
} | |
/* Add this highlight class styling */ | |
.highlight { | |
background-color: #FFFF00; /* Yellow highlight */ | |
padding: 2px 5px; | |
border-radius: 5px; | |
} | |
#verse-info { | |
margin-bottom: 20px; | |
text-align: center; | |
} | |
#verses { | |
display: flex; | |
flex-direction: row; | |
justify-content: center; | |
align-items: flex-start; | |
gap: 50px; | |
} | |
#hebrew-verse { | |
font-size: 18px; | |
line-height: 1.5; | |
margin-bottom: 20px; | |
text-align: right; | |
direction: rtl; | |
} | |
#english-verse { | |
font-size: 18px; | |
line-height: 1.5; | |
margin-bottom: 20px; | |
} | |
""") as iface: | |
with gr.Row(): | |
timezone_input = gr.Dropdown( | |
choices=[tz for tz in pytz.common_timezones], | |
label="Select Your Timezone", | |
value="UTC" # Set a default timezone | |
) | |
with gr.Row(): | |
advance_button = gr.Button("Advance to Current Time") | |
with gr.Row(): | |
verse_info_output = gr.Markdown(label="Verse Information", elem_id="verse-info") | |
# Place Hebrew and English verses within a flex container | |
with gr.Row(elem_id="verses"): | |
hebrew_verse_output = gr.HTML(label="Hebrew Verse", elem_id="hebrew-verse") | |
english_verse_output = gr.HTML(label="English Translation", elem_id="english-verse") | |
# Update the display with verse information and translations when the button is clicked | |
advance_button.click( | |
fn=lambda tz: update_tanach_display(datetime.datetime.now(pytz.timezone(tz)).strftime("%H:%M:%S"), tz), | |
inputs=[timezone_input], | |
outputs=[verse_info_output, hebrew_verse_output, english_verse_output], | |
) | |
class TestWordIndex(unittest.TestCase): | |
def test_word_index_boundaries(self): | |
# Test for 0:00:00 | |
word_data_start, _ = get_current_word_data("00:00:00") | |
self.assertEqual(word_data_start["book_id"], 1) | |
self.assertEqual(word_data_start["chapter_id"], 0) | |
self.assertEqual(word_data_start["verse_id"], 1) | |
# Test for 23:59:59 | |
word_data_end, _ = get_current_word_data("23:59:59") | |
self.assertEqual(word_data_end["book_id"], 39) | |
self.assertEqual(word_data_end["chapter_id"], 35) | |
self.assertEqual(word_data_end["verse_id"], 23) | |
if __name__ == '__main__': | |
# Run tests first | |
suite = unittest.TestLoader().loadTestsFromTestCase(TestWordIndex) | |
unittest.TextTestRunner().run(suite) | |
iface.launch(share=True) | |