tanach_clock / app.py
bartman081523
translation cache for app version
d34824e
import json
import logging
import datetime
import time
import requests
import pytz
import unittest
import sqlite3 # Import sqlite3 for database handling
import gradio as gr
from deep_translator import GoogleTranslator
from deep_translator.exceptions import NotValidLength, RequestError
from utils import process_json_files, flatten_text_with_line_breaks, calculate_tanach_statistics, build_word_index
# Set up logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
# Load Tanach text
TANACH_DATA = process_json_files(1, 39)
WORD_INDEX = build_word_index(TANACH_DATA)
# --- Database Setup ---
# Use a connection function to ensure each thread gets its own connection
def get_db_connection():
conn = sqlite3.connect('translation_cache.db')
conn.row_factory = sqlite3.Row # This line allows accessing columns by name
return conn
# Create the database table if it doesn't exist
with get_db_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS translations (
book_id INTEGER,
chapter_id INTEGER,
english_text TEXT,
PRIMARY KEY (book_id, chapter_id)
)
''')
conn.commit()
# --- Utility Functions ---
def get_current_word_data(client_time_str):
"""Gets data about the current word based on the client's time."""
try:
client_time = datetime.datetime.strptime(client_time_str, "%H:%M:%S")
total_seconds = int(client_time.strftime("%H")) * 3600 + \
int(client_time.strftime("%M")) * 60 + \
int(client_time.strftime("%S"))
# Find the closest key in WORD_INDEX
word_position = min(WORD_INDEX.keys(), key=lambda k: abs(k - total_seconds))
return WORD_INDEX[word_position], word_position
except Exception as e:
logging.error(f"Error processing client time: {e}")
return None, None
def get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=True):
"""Returns a formatted verse with optional word highlighting."""
chapter_text = TANACH_DATA[book_id]["text"][chapter_id]
flattened_chapter = flatten_text_with_line_breaks(chapter_text)
return '<br>'.join(flattened_chapter)
def translate_chapter(hebrew_chapter, book_id, chapter_id):
"""Translates a Hebrew chapter to English, caching the result in the database."""
# Get a new database connection for this thread
with get_db_connection() as conn:
cursor = conn.cursor()
# Check if translation exists in the database
cursor.execute(
"SELECT english_text FROM translations WHERE book_id=? AND chapter_id=?",
(book_id, chapter_id)
)
result = cursor.fetchone()
if result:
return result['english_text'].split('\n') # Retrieve from database and split into lines
try:
translator = GoogleTranslator(source='iw', target='en')
max_length = 2000 # Slightly below the limit to be safe
translated_text = ""
# Split the chapter into chunks smaller than the max length
chunks = [hebrew_chapter[i:i + max_length] for i in range(0, len(hebrew_chapter), max_length)]
for chunk in chunks:
translated_text += translator.translate(chunk)
# Store the translation in the database
cursor.execute(
"INSERT INTO translations (book_id, chapter_id, english_text) VALUES (?, ?, ?)",
(book_id, chapter_id, translated_text)
)
conn.commit()
return translated_text.split('\n') # Return as list of lines
except RequestError as e:
logging.warning(f"Translation failed: Request Error - {e}")
return ["Translation unavailable: Request Error"]
def update_tanach_display(client_time_str, timezone):
"""Updates the Gradio interface with client time, verse info, and translations."""
try:
# Get timezone offset using pytz
tz = pytz.timezone(timezone)
offset = tz.utcoffset(datetime.datetime.now()).total_seconds() / 3600 # Offset in hours
# Adjust client time based on the timezone
client_time_obj = datetime.datetime.strptime(client_time_str, "%H:%M:%S")
client_time_obj = client_time_obj.replace(tzinfo=datetime.timezone(datetime.timedelta(hours=offset)))
client_time_str = client_time_obj.strftime("%H:%M:%S")
except Exception as e:
logging.error(f"Error adjusting client time based on timezone: {e}")
return "Error: Invalid Timezone", "", ""
word_data, word_position = get_current_word_data(client_time_str)
if word_data is None:
logging.error(f"Word position {word_position} not found in index.")
return "Error: Word not found", "", ""
book_id = word_data["book_id"]
chapter_id = word_data["chapter_id"]
verse_id = word_data["verse_id"]
logging.debug(f"Book ID: {book_id}, Chapter ID: {chapter_id}, Verse ID: {verse_id}")
# Format verse information
verse_info = f"""
**{TANACH_DATA[book_id]['title']}**
Chapter {chapter_id + 1}, Verse {verse_id}
"""
# Get and format Hebrew and English verses
hebrew_verse = get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=False)
hebrew_verses = hebrew_verse.split("<br>")
# Translate the entire chapter and get the correct verse
hebrew_chapter = flatten_text_with_line_breaks(TANACH_DATA[book_id]["text"][chapter_id])
english_chapter = translate_chapter('\n'.join(hebrew_chapter), book_id, chapter_id)
# Highlight the current verse in both Hebrew and English
hebrew_verses[verse_id - 1] = f"<span class='highlight'>{hebrew_verses[verse_id - 1]}</span>"
english_chapter[verse_id - 1] = f"<span class='highlight'>{english_chapter[verse_id - 1]}</span>"
# Join the verses back with <br> for display
hebrew_verse = "<br>".join(hebrew_verses)
english_verse = "<br>".join(english_chapter)
return verse_info, hebrew_verse, english_verse
# --- Gradio Interface ---
with gr.Blocks(css="""
.container {
display: flex;
flex-direction: column;
align-items: center;
font-family: 'Times New Roman', serif;
}
/* Add this highlight class styling */
.highlight {
background-color: #FFFF00; /* Yellow highlight */
padding: 2px 5px;
border-radius: 5px;
}
#verse-info {
margin-bottom: 20px;
text-align: center;
}
#verses {
display: flex;
flex-direction: row;
justify-content: center;
align-items: flex-start;
gap: 50px;
}
#hebrew-verse {
font-size: 18px;
line-height: 1.5;
margin-bottom: 20px;
text-align: right;
direction: rtl;
}
#english-verse {
font-size: 18px;
line-height: 1.5;
margin-bottom: 20px;
}
""") as iface:
with gr.Row():
timezone_input = gr.Dropdown(
choices=[tz for tz in pytz.common_timezones],
label="Select Your Timezone",
value="UTC" # Set a default timezone
)
with gr.Row():
advance_button = gr.Button("Advance to Current Time")
with gr.Row():
verse_info_output = gr.Markdown(label="Verse Information", elem_id="verse-info")
# Place Hebrew and English verses within a flex container
with gr.Row(elem_id="verses"):
hebrew_verse_output = gr.HTML(label="Hebrew Verse", elem_id="hebrew-verse")
english_verse_output = gr.HTML(label="English Translation", elem_id="english-verse")
# Update the display with verse information and translations when the button is clicked
advance_button.click(
fn=lambda tz: update_tanach_display(datetime.datetime.now(pytz.timezone(tz)).strftime("%H:%M:%S"), tz),
inputs=[timezone_input],
outputs=[verse_info_output, hebrew_verse_output, english_verse_output],
)
class TestWordIndex(unittest.TestCase):
def test_word_index_boundaries(self):
# Test for 0:00:00
word_data_start, _ = get_current_word_data("00:00:00")
self.assertEqual(word_data_start["book_id"], 1)
self.assertEqual(word_data_start["chapter_id"], 0)
self.assertEqual(word_data_start["verse_id"], 1)
# Test for 23:59:59
word_data_end, _ = get_current_word_data("23:59:59")
self.assertEqual(word_data_end["book_id"], 39)
self.assertEqual(word_data_end["chapter_id"], 35)
self.assertEqual(word_data_end["verse_id"], 23)
if __name__ == '__main__':
# Run tests first
suite = unittest.TestLoader().loadTestsFromTestCase(TestWordIndex)
unittest.TextTestRunner().run(suite)
iface.launch(share=True)