Spaces:

vilarin
/

Translation-Agent-WebUI

Running

App Files Files Community

vilarin commited on Jul 2, 2024

Commit

ad326f0

verified ·

1 Parent(s): 55feb21

Upload 3 files

Browse files

Files changed (3) hide show

app/webui/app.py +277 -244
app/webui/patch.py +33 -1
app/webui/process.py +213 -213

app/webui/app.py CHANGED Viewed

@@ -1,245 +1,278 @@
-import sys
-import os
-# Add the project root to the Python path
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
-sys.path.insert(0, project_root)
-import re
-import gradio as gr
-from app.webui.process import model_load, diff_texts, translator, translator_sec
-from llama_index.core import SimpleDirectoryReader
-def huanik(
-    endpoint,
-    model,
-    api_key,
-    choice,
-    endpoint2,
-    model2,
-    api_key2,
-    source_lang,
-    target_lang,
-    source_text,
-    country,
-    max_tokens,
-    context_window,
-    num_output,
-):
-    if not source_text or source_lang == target_lang:
-        raise gr.Error("Please check that the content or options are entered correctly.")
-    try:
-        model_load(endpoint, model, api_key, context_window, num_output)
-    except Exception as e:
-        raise gr.Error(f"An unexpected error occurred: {e}")
-    source_text = re.sub(r'(?m)^\s*$\n?', '', source_text)
-    if choice:
-        init_translation, reflect_translation, final_translation = translator_sec(
-            endpoint2=endpoint2,
-            model2=model2,
-            api_key2=api_key2,
-            context_window=context_window,
-            num_output=num_output,
-            source_lang=source_lang,
-            target_lang=target_lang,
-            source_text=source_text,
-            country=country,
-            max_tokens=max_tokens,
-        )
-    else:
-        init_translation, reflect_translation, final_translation = translator(
-            source_lang=source_lang,
-            target_lang=target_lang,
-            source_text=source_text,
-            country=country,
-            max_tokens=max_tokens,
-        )
-    final_diff = gr.HighlightedText(
-        diff_texts(init_translation, final_translation),
-        label="Diff translation",
-        combine_adjacent=True,
-        show_legend=True,
-        visible=True,
-        color_map={"removed": "red", "added": "green"})
-    return init_translation, reflect_translation, final_translation, final_diff
-def update_model(endpoint):
-    endpoint_model_map = {
-        "Groq": "llama3-70b-8192",
-        "OpenAI": "gpt-4o",
-        "Cohere": "command-r",
-        "TogetherAI": "Qwen/Qwen2-72B-Instruct",
-        "Ollama": "llama3",
-        "Huggingface": "mistralai/Mistral-7B-Instruct-v0.3"
-    }
-    return gr.update(value=endpoint_model_map[endpoint])
-def read_doc(file):
-    docs = SimpleDirectoryReader(input_files=[file]).load_data()
-    texts = ""
-    for doc in docs:
-        texts += doc.text
-    texts = re.sub(r'(?m)^\s*$\n?', '', texts)
-    return texts
-def enable_sec(choice):
-    if choice:
-        return gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)
-    else:
-        return gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)
-def update_menu(visible):
-    return not visible, gr.update(visible=not visible)
-TITLE = """
-    <div style="display: inline-flex;">
-        <div style="margin-left: 6px; font-size:32px; color: #6366f1"><b>Translation Agent</b> WebUI</div>
-    </div>
-"""
-CSS = """
-    h1 {
-        text-align: center;
-        display: block;
-        height: 10vh;
-        align-content: center;
-    }
-    footer {
-        visibility: hidden;
-    }
-    .menu_btn {
-        width: 48px;
-        height: 48px;
-        max-width: 48px;
-        min-width: 48px;
-        padding: 0px;
-        background-color: transparent;
-        border: none;
-        cursor: pointer;
-        position: relative;
-        box-shadow: none;
-    }
-    .menu_btn::before,
-    .menu_btn::after {
-        content: '';
-        position: absolute;
-        width: 30px;
-        height: 3px;
-        background-color: #4f46e5;
-        transition: transform 0.3s ease;
-    }
-    .menu_btn::before {
-        top: 12px;
-        box-shadow: 0 8px 0 #6366f1;
-    }
-    .menu_btn::after {
-        bottom: 16px;
-    }
-    .menu_btn.active::before {
-        transform: translateY(8px) rotate(45deg);
-        box-shadow: none;
-    }
-    .menu_btn.active::after {
-        transform: translateY(-8px) rotate(-45deg);
-    }
-"""
-JS = """
-    function () {
-        const menuBtn = document.getElementById('menu');
-        menuBtn.classList.toggle('active');
-    }
-"""
-with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
-    with gr.Row():
-        visible = gr.State(value=True)
-        menuBtn = gr.Button(value="", elem_classes="menu_btn", elem_id="menu", size="sm")
-        gr.HTML(TITLE)
-    with gr.Row():
-        with gr.Column(scale=1) as menubar:
-            endpoint = gr.Dropdown(
-                label="Endpoint",
-                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
-                value="Huggingface",
-            )
-            choice = gr.Checkbox(label="Second Endpoint", info="Add second endpoint for reflection")
-            model = gr.Textbox(label="Model", value="mistralai/Mistral-7B-Instruct-v0.3", )
-            api_key = gr.Textbox(label="API_KEY", type="password", )
-            endpoint2 = gr.Dropdown(
-                label="Endpoint 2",
-                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
-                value="Groq",
-                visible=False,
-            )
-            model2 = gr.Textbox(label="Model 2", value="llama3-70b-8192", visible=False,)
-            api_key2 = gr.Textbox(label="API_KEY 2", type="password", visible=False,)
-            source_lang = gr.Textbox(
-                label="Source Lang",
-                value="English",
-            )
-            target_lang = gr.Textbox(
-                label="Target Lang",
-                value="Spanish",
-            )
-            country = gr.Textbox(label="Country", value="Argentina", max_lines=1)
-            with gr.Accordion("Advanced Options", open=False):
-                max_tokens = gr.Slider(
-                    label="Max tokens Per Chunk",
-                    minimum=512,
-                    maximum=2046,
-                    value=1000,
-                    step=8,
-                    )
-                context_window = gr.Slider(
-                    label="Context Window",
-                    minimum=512,
-                    maximum=8192,
-                    value=4096,
-                    step=8,
-                    )
-                num_output = gr.Slider(
-                    label="Output Num",
-                    minimum=256,
-                    maximum=8192,
-                    value=512,
-                    step=8,
-                    )
-        with gr.Column(scale=4):
-            source_text = gr.Textbox(
-                label="Source Text",
-                value="How we live is so different from how we ought to live that he who studies "+\
-                "what ought to be done rather than what is done will learn the way to his downfall "+\
-                "rather than to his preservation.",
-                lines=12,
-            )
-            with gr.Tab("Final"):
-                output_final = gr.Textbox(label="FInal Translation", lines=12, show_copy_button=True)
-            with gr.Tab("Initial"):
-                output_init = gr.Textbox(label="Init Translation", lines=12, show_copy_button=True)
-            with gr.Tab("Reflection"):
-                output_reflect = gr.Textbox(label="Reflection", lines=12, show_copy_button=True)
-            with gr.Tab("Diff"):
-                output_diff = gr.HighlightedText(visible = False)
-    with gr.Row():
-        submit = gr.Button(value="Submit")
-        upload = gr.UploadButton(label="Upload", file_types=["text"])
-        clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
-    menuBtn.click(fn=update_menu, inputs=visible, outputs=[visible, menubar], js=JS)
-    endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
-    choice.select(fn=enable_sec, inputs=[choice], outputs=[endpoint2, model2, api_key2])
-    endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
-    submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
-    upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
-if __name__ == "__main__":
     demo.queue(api_open=False).launch(show_api=False, share=False)

+import sys
+import os
+# Add the project root to the Python path
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+sys.path.insert(0, project_root)
+import re
+import gradio as gr
+from glob import glob
+from app.webui.process import model_load, diff_texts, translator, translator_sec
+from llama_index.core import SimpleDirectoryReader
+def huanik(
+    endpoint: str,
+    model: str,
+    api_key: str,
+    choice: str,
+    endpoint2: str,
+    model2: str,
+    api_key2: str,
+    source_lang: str,
+    target_lang: str,
+    source_text: str,
+    country: str,
+    max_tokens: int,
+    context_window: int,
+    num_output: int,
+    rpm: int,
+):
+    if not source_text or source_lang == target_lang:
+        raise gr.Error("Please check that the content or options are entered correctly.")
+    try:
+        model_load(endpoint, model, api_key, context_window, num_output, rpm)
+    except Exception as e:
+        raise gr.Error(f"An unexpected error occurred: {e}")
+    source_text = re.sub(r'(?m)^\s*$\n?', '', source_text)
+    if choice:
+        init_translation, reflect_translation, final_translation = translator_sec(
+            endpoint2=endpoint2,
+            model2=model2,
+            api_key2=api_key2,
+            context_window=context_window,
+            num_output=num_output,
+            source_lang=source_lang,
+            target_lang=target_lang,
+            source_text=source_text,
+            country=country,
+            max_tokens=max_tokens,
+        )
+    else:
+        init_translation, reflect_translation, final_translation = translator(
+            source_lang=source_lang,
+            target_lang=target_lang,
+            source_text=source_text,
+            country=country,
+            max_tokens=max_tokens,
+        )
+    final_diff = gr.HighlightedText(
+        diff_texts(init_translation, final_translation),
+        label="Diff translation",
+        combine_adjacent=True,
+        show_legend=True,
+        visible=True,
+        color_map={"removed": "red", "added": "green"})
+    return init_translation, reflect_translation, final_translation, final_diff
+def update_model(endpoint):
+    endpoint_model_map = {
+        "Groq": "llama3-70b-8192",
+        "OpenAI": "gpt-4o",
+        "Cohere": "command-r",
+        "TogetherAI": "Qwen/Qwen2-72B-Instruct",
+        "Ollama": "llama3",
+        "Huggingface": "mistralai/Mistral-7B-Instruct-v0.3"
+    }
+    return gr.update(value=endpoint_model_map[endpoint])
+def read_doc(file):
+    docs = SimpleDirectoryReader(input_files=[file]).load_data()
+    texts = ""
+    for doc in docs:
+        texts += doc.text
+    texts = re.sub(r'(?m)^\s*$\n?', '', texts)
+    return texts
+def enable_sec(choice):
+    if choice:
+        return gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)
+    else:
+        return gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)
+def update_menu(visible):
+    return not visible, gr.update(visible=not visible)
+def export_txt(strings):
+    os.makedirs("outputs", exist_ok=True)
+    base_count = len(glob(os.path.join("outputs", "*.txt")))
+    file_path = os.path.join("outputs", f"{base_count:06d}.txt")
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write(strings)
+    return gr.update(value=file_path, visible=True)
+def switch(source_lang,source_text,target_lang,output_final):
+    if output_final:
+        return gr.update(value=target_lang), gr.update(value=output_final), gr.update(value=source_lang), gr.update(value=source_text)
+    else:
+        return gr.update(value=target_lang), gr.update(value=source_text), gr.update(value=source_lang), gr.update(value="")
+TITLE = """
+    <div style="display: inline-flex;">
+        <div style="margin-left: 6px; font-size:32px; color: #6366f1"><b>Translation Agent</b> WebUI</div>
+    </div>
+"""
+CSS = """
+    h1 {
+        text-align: center;
+        display: block;
+        height: 10vh;
+        align-content: center;
+    }
+    footer {
+        visibility: hidden;
+    }
+    .menu_btn {
+        width: 48px;
+        height: 48px;
+        max-width: 48px;
+        min-width: 48px;
+        padding: 0px;
+        background-color: transparent;
+        border: none;
+        cursor: pointer;
+        position: relative;
+        box-shadow: none;
+    }
+    .menu_btn::before,
+    .menu_btn::after {
+        content: '';
+        position: absolute;
+        width: 30px;
+        height: 3px;
+        background-color: #4f46e5;
+        transition: transform 0.3s ease;
+    }
+    .menu_btn::before {
+        top: 12px;
+        box-shadow: 0 8px 0 #6366f1;
+    }
+    .menu_btn::after {
+        bottom: 16px;
+    }
+    .menu_btn.active::before {
+        transform: translateY(8px) rotate(45deg);
+        box-shadow: none;
+    }
+    .menu_btn.active::after {
+        transform: translateY(-8px) rotate(-45deg);
+    }
+    .lang {
+        max-width: 100px;
+        min-width: 100px;
+    }
+"""
+JS = """
+    function () {
+        const menuBtn = document.getElementById('menu');
+        menuBtn.classList.toggle('active');
+    }
+"""
+with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
+    with gr.Row():
+        visible = gr.State(value=True)
+        menuBtn = gr.Button(value="", elem_classes="menu_btn", elem_id="menu", size="sm")
+        gr.HTML(TITLE)
+    with gr.Row():
+        with gr.Column(scale=1) as menubar:
+            endpoint = gr.Dropdown(
+                label="Endpoint",
+                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
+                value="OpenAI",
+            )
+            choice = gr.Checkbox(label="Second Endpoint", info="Add second endpoint for reflection")
+            model = gr.Textbox(label="Model", value="gpt-4o", )
+            api_key = gr.Textbox(label="API_KEY", type="password", )
+            endpoint2 = gr.Dropdown(
+                label="Endpoint 2",
+                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
+                value="OpenAI",
+                visible=False,
+            )
+            model2 = gr.Textbox(label="Model 2", value="gpt-4o", visible=False,)
+            api_key2 = gr.Textbox(label="API_KEY 2", type="password", visible=False,)
+            with gr.Row():
+                source_lang = gr.Textbox(
+                    label="Source Lang",
+                    value="English",
+                    elem_classes = "lang",
+                )
+                target_lang = gr.Textbox(
+                    label="Target Lang",
+                    value="Spanish",
+                    elem_classes = "lang",
+                )
+            switchBtn = gr.Button(value="🔄️")
+            country = gr.Textbox(label="Country", value="Argentina", max_lines=1)
+            with gr.Accordion("Advanced Options", open=False):
+                max_tokens = gr.Slider(
+                    label="Max tokens Per Chunk",
+                    minimum=512,
+                    maximum=2046,
+                    value=1000,
+                    step=8,
+                    )
+                context_window = gr.Slider(
+                    label="Context Window",
+                    minimum=512,
+                    maximum=8192,
+                    value=4096,
+                    step=8,
+                    )
+                num_output = gr.Slider(
+                    label="Output Num",
+                    minimum=256,
+                    maximum=8192,
+                    value=512,
+                    step=8,
+                    )
+                rpm = gr.Slider(
+                    label="Request Per Minute",
+                    minimum=1,
+                    maximum=1000,
+                    value=60,
+                    step=1,
+                    )
+        with gr.Column(scale=4):
+            source_text = gr.Textbox(
+                label="Source Text",
+                value="How we live is so different from how we ought to live that he who studies "+\
+                "what ought to be done rather than what is done will learn the way to his downfall "+\
+                "rather than to his preservation.",
+                lines=12,
+            )
+            with gr.Tab("Final"):
+                output_final = gr.Textbox(label="FInal Translation", lines=12, show_copy_button=True)
+            with gr.Tab("Initial"):
+                output_init = gr.Textbox(label="Init Translation", lines=12, show_copy_button=True)
+            with gr.Tab("Reflection"):
+                output_reflect = gr.Textbox(label="Reflection", lines=12, show_copy_button=True)
+            with gr.Tab("Diff"):
+                output_diff = gr.HighlightedText(visible = False)
+    with gr.Row():
+        submit = gr.Button(value="Translate")
+        upload = gr.UploadButton(label="Upload", file_types=["text"])
+        export = gr.DownloadButton(visible=False)
+        clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
+    switchBtn.click(fn=switch, inputs=[source_lang,source_text,target_lang,output_final], outputs=[source_lang,source_text,target_lang,output_final])
+    menuBtn.click(fn=update_menu, inputs=visible, outputs=[visible, menubar], js=JS)
+    endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
+    choice.select(fn=enable_sec, inputs=[choice], outputs=[endpoint2, model2, api_key2])
+    endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
+    submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output, rpm], outputs=[output_init, output_reflect, output_final, output_diff])
+    upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
+    output_final.change(fn=export_txt, inputs=output_final, outputs=[export])
+if __name__ == "__main__":
     demo.queue(api_open=False).launch(show_api=False, share=False)

app/webui/patch.py CHANGED Viewed

@@ -1,5 +1,8 @@
 # a monkey patch to use llama-index completion
 import os
 from typing import Union
 import src.translation_agent.utils as utils
@@ -13,15 +16,16 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core import Settings
 from llama_index.core.llms import ChatMessage
 # Add your LLMs here
 def model_load(
         endpoint: str,
         model: str,
         api_key: str = None,
         context_window: int = 4096,
         num_output: int = 512,
 ):
     if endpoint == "Groq":
         llm = Groq(
@@ -53,6 +57,10 @@ def model_load(
             token=api_key if api_key else os.getenv("HF_TOKEN"),
             task="text-generation",
         )
     Settings.llm = llm
     # maximum input size to the LLM
     Settings.context_window = context_window
@@ -60,7 +68,29 @@ def model_load(
     # number of tokens reserved for text generation.
     Settings.num_output = num_output
 def get_completion(
         prompt: str,
         system_message: str = "You are a helpful assistant.",
@@ -84,6 +114,7 @@ def get_completion(
                 If json_mode is True, returns the complete API response as a dictionary.
                 If json_mode is False, returns the generated text as a string.
         """
         llm = Settings.llm
         if llm.class_name() == "HuggingFaceInferenceAPI":
             llm.system_prompt = system_message
@@ -91,6 +122,7 @@ def get_completion(
                 ChatMessage(
                     role="user", content=prompt),
             ]
             response = llm.chat(
                 messages=messages,
                 temperature=temperature,

 # a monkey patch to use llama-index completion
 import os
+import time
+from functools import wraps
+from threading import Lock
 from typing import Union
 import src.translation_agent.utils as utils
 from llama_index.core import Settings
 from llama_index.core.llms import ChatMessage
+RPM = 60
 # Add your LLMs here
 def model_load(
         endpoint: str,
         model: str,
         api_key: str = None,
         context_window: int = 4096,
         num_output: int = 512,
+        rpm: int = RPM,
 ):
     if endpoint == "Groq":
         llm = Groq(
             token=api_key if api_key else os.getenv("HF_TOKEN"),
             task="text-generation",
         )
+    global RPM
+    RPM = rpm
     Settings.llm = llm
     # maximum input size to the LLM
     Settings.context_window = context_window
     # number of tokens reserved for text generation.
     Settings.num_output = num_output
+def rate_limit(get_max_per_minute):
+    def decorator(func):
+        lock = Lock()
+        last_called = [0.0]
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            with lock:
+                max_per_minute = get_max_per_minute()
+                min_interval = 60.0 / max_per_minute
+                elapsed = time.time() - last_called[0]
+                left_to_wait = min_interval - elapsed
+                if left_to_wait > 0:
+                    time.sleep(left_to_wait)
+                ret = func(*args, **kwargs)
+                last_called[0] = time.time()
+                return ret
+        return wrapper
+    return decorator
+@rate_limit(lambda: RPM)
 def get_completion(
         prompt: str,
         system_message: str = "You are a helpful assistant.",
                 If json_mode is True, returns the complete API response as a dictionary.
                 If json_mode is False, returns the generated text as a string.
         """
+        print(time.localtime())
         llm = Settings.llm
         if llm.class_name() == "HuggingFaceInferenceAPI":
             llm.system_prompt = system_message
                 ChatMessage(
                     role="user", content=prompt),
             ]
             response = llm.chat(
                 messages=messages,
                 temperature=temperature,

app/webui/process.py CHANGED Viewed

@@ -1,213 +1,213 @@
-import gradio as gr
-from simplemma import simple_tokenizer
-from difflib import Differ
-from icecream import ic
-from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
-from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
-from llama_index.core.node_parser import SentenceSplitter
-def tokenize(text):
-    # Use nltk to tokenize the text
-    words = simple_tokenizer(text)
-    # Check if the text contains spaces
-    if ' ' in text:
-        # Create a list of words and spaces
-        tokens = []
-        for word in words:
-            tokens.append(word)
-            if not word.startswith("'") and not word.endswith("'"):  # Avoid adding space after punctuation
-                tokens.append(' ')  # Add space after each word
-        return tokens[:-1]  # Remove the last space
-    else:
-        return words
-def diff_texts(text1, text2):
-    tokens1 = tokenize(text1)
-    tokens2 = tokenize(text2)
-    d = Differ()
-    diff_result = list(d.compare(tokens1, tokens2))
-    highlighted_text = []
-    for token in diff_result:
-        word = token[2:]
-        category = None
-        if token[0] == '+':
-            category = 'added'
-        elif token[0] == '-':
-            category = 'removed'
-        elif token[0] == '?':
-            continue  # Ignore the hints line
-        highlighted_text.append((word, category))
-    return highlighted_text
-#modified from src.translaation-agent.utils.tranlsate
-def translator(
-        source_lang,
-        target_lang,
-        source_text,
-        country,
-        max_tokens=1000,
-):
-    """Translate the source_text from source_lang to target_lang."""
-    num_tokens_in_text = num_tokens_in_string(source_text)
-    ic(num_tokens_in_text)
-    if num_tokens_in_text < max_tokens:
-        ic("Translating text as single chunk")
-        #Note: use yield from B() if put yield in function B()
-        init_translation = one_chunk_initial_translation(
-            source_lang, target_lang, source_text
-        )
-        reflection = one_chunk_reflect_on_translation(
-            source_lang, target_lang, source_text, init_translation, country
-        )
-        final_translation = one_chunk_improve_translation(
-            source_lang, target_lang, source_text, init_translation, reflection
-        )
-        return init_translation, reflection, final_translation
-    else:
-        ic("Translating text as multiple chunks")
-        token_size = calculate_chunk_size(
-            token_count=num_tokens_in_text, token_limit=max_tokens
-        )
-        ic(token_size)
-        #using sentence splitter
-        text_parser = SentenceSplitter(
-           chunk_size=token_size,
-        )
-        source_text_chunks = text_parser.split_text(source_text)
-        translation_1_chunks = multichunk_initial_translation(
-            source_lang, target_lang, source_text_chunks
-        )
-        init_translation = "".join(translation_1_chunks)
-        reflection_chunks = multichunk_reflect_on_translation(
-            source_lang,
-            target_lang,
-            source_text_chunks,
-            translation_1_chunks,
-            country,
-        )
-        reflection = "".join(reflection_chunks)
-        translation_2_chunks = multichunk_improve_translation(
-            source_lang,
-            target_lang,
-            source_text_chunks,
-            translation_1_chunks,
-            reflection_chunks,
-        )
-        final_translation = "".join(translation_2_chunks)
-        return init_translation, reflection, final_translation
-def translator_sec(
-        endpoint2,
-        model2,
-        api_key2,
-        context_window,
-        num_output,
-        source_lang,
-        target_lang,
-        source_text,
-        country,
-        max_tokens=1000,
-):
-    """Translate the source_text from source_lang to target_lang."""
-    num_tokens_in_text = num_tokens_in_string(source_text)
-    ic(num_tokens_in_text)
-    if num_tokens_in_text < max_tokens:
-        ic("Translating text as single chunk")
-        #Note: use yield from B() if put yield in function B()
-        init_translation = one_chunk_initial_translation(
-            source_lang, target_lang, source_text
-        )
-        try:
-            model_load(endpoint2, model2, api_key2, context_window, num_output)
-        except Exception as e:
-            raise gr.Error(f"An unexpected error occurred: {e}")
-        reflection = one_chunk_reflect_on_translation(
-            source_lang, target_lang, source_text, init_translation, country
-        )
-        final_translation = one_chunk_improve_translation(
-            source_lang, target_lang, source_text, init_translation, reflection
-        )
-        return init_translation, reflection, final_translation
-    else:
-        ic("Translating text as multiple chunks")
-        token_size = calculate_chunk_size(
-            token_count=num_tokens_in_text, token_limit=max_tokens
-        )
-        ic(token_size)
-        #using sentence splitter
-        text_parser = SentenceSplitter(
-           chunk_size=token_size,
-        )
-        source_text_chunks = text_parser.split_text(source_text)
-        translation_1_chunks = multichunk_initial_translation(
-            source_lang, target_lang, source_text_chunks
-        )
-        init_translation = "".join(translation_1_chunks)
-        try:
-            model_load(endpoint2, model2, api_key2, context_window, num_output)
-        except Exception as e:
-            raise gr.Error(f"An unexpected error occurred: {e}")
-        reflection_chunks = multichunk_reflect_on_translation(
-            source_lang,
-            target_lang,
-            source_text_chunks,
-            translation_1_chunks,
-            country,
-        )
-        reflection = "".join(reflection_chunks)
-        translation_2_chunks = multichunk_improve_translation(
-            source_lang,
-            target_lang,
-            source_text_chunks,
-            translation_1_chunks,
-            reflection_chunks,
-        )
-        final_translation = "".join(translation_2_chunks)
-        return init_translation, reflection, final_translation

+import gradio as gr
+from simplemma import simple_tokenizer
+from difflib import Differ
+from icecream import ic
+from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
+from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
+from llama_index.core.node_parser import SentenceSplitter
+def tokenize(text):
+    # Use nltk to tokenize the text
+    words = simple_tokenizer(text)
+    # Check if the text contains spaces
+    if ' ' in text:
+        # Create a list of words and spaces
+        tokens = []
+        for word in words:
+            tokens.append(word)
+            if not word.startswith("'") and not word.endswith("'"):  # Avoid adding space after punctuation
+                tokens.append(' ')  # Add space after each word
+        return tokens[:-1]  # Remove the last space
+    else:
+        return words
+def diff_texts(text1, text2):
+    tokens1 = tokenize(text1)
+    tokens2 = tokenize(text2)
+    d = Differ()
+    diff_result = list(d.compare(tokens1, tokens2))
+    highlighted_text = []
+    for token in diff_result:
+        word = token[2:]
+        category = None
+        if token[0] == '+':
+            category = 'added'
+        elif token[0] == '-':
+            category = 'removed'
+        elif token[0] == '?':
+            continue  # Ignore the hints line
+        highlighted_text.append((word, category))
+    return highlighted_text
+#modified from src.translaation-agent.utils.tranlsate
+def translator(
+        source_lang: str,
+        target_lang: str,
+        source_text: str,
+        country: str,
+        max_tokens:int = 1000,
+):
+    """Translate the source_text from source_lang to target_lang."""
+    num_tokens_in_text = num_tokens_in_string(source_text)
+    ic(num_tokens_in_text)
+    if num_tokens_in_text < max_tokens:
+        ic("Translating text as single chunk")
+        #Note: use yield from B() if put yield in function B()
+        init_translation = one_chunk_initial_translation(
+            source_lang, target_lang, source_text
+        )
+        reflection = one_chunk_reflect_on_translation(
+            source_lang, target_lang, source_text, init_translation, country
+        )
+        final_translation = one_chunk_improve_translation(
+            source_lang, target_lang, source_text, init_translation, reflection
+        )
+        return init_translation, reflection, final_translation
+    else:
+        ic("Translating text as multiple chunks")
+        token_size = calculate_chunk_size(
+            token_count=num_tokens_in_text, token_limit=max_tokens
+        )
+        ic(token_size)
+        #using sentence splitter
+        text_parser = SentenceSplitter(
+           chunk_size=token_size,
+        )
+        source_text_chunks = text_parser.split_text(source_text)
+        translation_1_chunks = multichunk_initial_translation(
+            source_lang, target_lang, source_text_chunks
+        )
+        init_translation = "".join(translation_1_chunks)
+        reflection_chunks = multichunk_reflect_on_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            country,
+        )
+        reflection = "".join(reflection_chunks)
+        translation_2_chunks = multichunk_improve_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            reflection_chunks,
+        )
+        final_translation = "".join(translation_2_chunks)
+        return init_translation, reflection, final_translation
+def translator_sec(
+        endpoint2: str,
+        model2: str,
+        api_key2: str,
+        context_window: int,
+        num_output: int,
+        source_lang: str,
+        target_lang: str,
+        source_text: str,
+        country: str,
+        max_tokens: int = 1000,
+):
+    """Translate the source_text from source_lang to target_lang."""
+    num_tokens_in_text = num_tokens_in_string(source_text)
+    ic(num_tokens_in_text)
+    if num_tokens_in_text < max_tokens:
+        ic("Translating text as single chunk")
+        #Note: use yield from B() if put yield in function B()
+        init_translation = one_chunk_initial_translation(
+            source_lang, target_lang, source_text
+        )
+        try:
+            model_load(endpoint2, model2, api_key2, context_window, num_output)
+        except Exception as e:
+            raise gr.Error(f"An unexpected error occurred: {e}")
+        reflection = one_chunk_reflect_on_translation(
+            source_lang, target_lang, source_text, init_translation, country
+        )
+        final_translation = one_chunk_improve_translation(
+            source_lang, target_lang, source_text, init_translation, reflection
+        )
+        return init_translation, reflection, final_translation
+    else:
+        ic("Translating text as multiple chunks")
+        token_size = calculate_chunk_size(
+            token_count=num_tokens_in_text, token_limit=max_tokens
+        )
+        ic(token_size)
+        #using sentence splitter
+        text_parser = SentenceSplitter(
+           chunk_size=token_size,
+        )
+        source_text_chunks = text_parser.split_text(source_text)
+        translation_1_chunks = multichunk_initial_translation(
+            source_lang, target_lang, source_text_chunks
+        )
+        init_translation = "".join(translation_1_chunks)
+        try:
+            model_load(endpoint2, model2, api_key2, context_window, num_output)
+        except Exception as e:
+            raise gr.Error(f"An unexpected error occurred: {e}")
+        reflection_chunks = multichunk_reflect_on_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            country,
+        )
+        reflection = "".join(reflection_chunks)
+        translation_2_chunks = multichunk_improve_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            reflection_chunks,
+        )
+        final_translation = "".join(translation_2_chunks)
+        return init_translation, reflection, final_translation