Spaces:

vilarin
/

Translation-Agent-WebUI

Running

App Files Files Community

vilarin commited on Jun 29, 2024

Commit

3bf46ba

verified ·

1 Parent(s): 703602c

Upload 12 files

Browse files

Files changed (8) hide show

app/webui/README.md +13 -3
app/webui/__pycache__/app.cpython-310.pyc +0 -0
app/webui/__pycache__/patch.cpython-310.pyc +0 -0
app/webui/__pycache__/process.cpython-310.pyc +0 -0
app/webui/app.py +46 -9
app/webui/patch.py +0 -3
app/webui/process.py +88 -0
app/webui/requirements.txt +1 -1

app/webui/README.md CHANGED Viewed

@@ -3,6 +3,10 @@
 This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
 **Features:**
 - **Tokenized Text:**  Displays translated text with tokenization, highlighting differences between original and translated words.
@@ -21,6 +25,7 @@ Llama Index supported, easily extendable
 **Getting Started**
 1. **Install Dependencies:**
     **Linux(Using Python Venv)**
     ```bash
         git clone https://github.com/andrewyng/translation-agent.git
@@ -63,9 +68,11 @@ Llama Index supported, easily extendable
 **Usage:**
 1. Select your desired translation API from the Endpoint dropdown menu.
-2. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
-3. Input the source text or upload your document file.
-4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
 **Customization:**
@@ -79,3 +86,6 @@ Contributions are welcome! Feel free to open issues or submit pull requests.
 This project is licensed under the MIT License.

 This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
+### Preview
+![webui](image.png)
 **Features:**
 - **Tokenized Text:**  Displays translated text with tokenization, highlighting differences between original and translated words.
 **Getting Started**
 1. **Install Dependencies:**
     **Linux(Using Python Venv)**
     ```bash
         git clone https://github.com/andrewyng/translation-agent.git
 **Usage:**
 1. Select your desired translation API from the Endpoint dropdown menu.
+2. Input the source language, target language, and country(optional).
+3. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
+4. Input the source text or upload your document file.
+5. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
+6. Enable Second Endpoint, you can add another endpoint by different LLMs for reflection.
 **Customization:**
 This project is licensed under the MIT License.
+**DEMO:**
+[Huggingface Demo](https://huggingface.co/spaces/vilarin/Translation-Agent-WebUI)

app/webui/__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/app/webui/__pycache__/app.cpython-310.pyc and b/app/webui/__pycache__/app.cpython-310.pyc differ

app/webui/__pycache__/patch.cpython-310.pyc CHANGED Viewed

Binary files a/app/webui/__pycache__/patch.cpython-310.pyc and b/app/webui/__pycache__/patch.cpython-310.pyc differ

app/webui/__pycache__/process.cpython-310.pyc CHANGED Viewed

Binary files a/app/webui/__pycache__/process.cpython-310.pyc and b/app/webui/__pycache__/process.cpython-310.pyc differ

app/webui/app.py CHANGED Viewed

@@ -7,13 +7,17 @@ sys.path.insert(0, project_root)
 import re
 import gradio as gr
-from app.webui.process import model_load, diff_texts, translator
 from llama_index.core import SimpleDirectoryReader
 def huanik(
     endpoint,
     model,
     api_key,
     source_lang,
     target_lang,
     source_text,
@@ -33,13 +37,28 @@ def huanik(
     source_text =  re.sub(r'\n+', '\n', source_text)
-    init_translation, reflect_translation, final_translation = translator(
-        source_lang=source_lang,
-        target_lang=target_lang,
-        source_text=source_text,
-        country=country,
-        max_tokens=max_tokens,
-    )
     final_diff = gr.HighlightedText(
         diff_texts(init_translation, final_translation),
@@ -66,6 +85,13 @@ def read_doc(file):
     docs = SimpleDirectoryReader(input_files=[file]).load_data()
     return docs[0].text
 TITLE = """
 <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
 """
@@ -91,8 +117,17 @@ with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
                 choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
                 value="OpenAI",
             )
             model = gr.Textbox(label="Model", value="gpt-4o", )
             api_key = gr.Textbox(label="API_KEY", type="password", )
             source_lang = gr.Textbox(
                 label="Source Lang",
                 value="English",
@@ -146,7 +181,9 @@ with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
         clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
     endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
-    submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
     upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
 if __name__ == "__main__":

 import re
 import gradio as gr
+from app.webui.process import model_load, diff_texts, translator, translator_sec
 from llama_index.core import SimpleDirectoryReader
 def huanik(
     endpoint,
     model,
     api_key,
+    choice,
+    endpoint2,
+    model2,
+    api_key2,
     source_lang,
     target_lang,
     source_text,
     source_text =  re.sub(r'\n+', '\n', source_text)
+    if choice:
+        init_translation, reflect_translation, final_translation = translator_sec(
+            endpoint2=endpoint2,
+            model2=model2,
+            api_key2=api_key2,
+            context_window=context_window,
+            num_output=num_output,
+            source_lang=source_lang,
+            target_lang=target_lang,
+            source_text=source_text,
+            country=country,
+            max_tokens=max_tokens,
+        )
+    else:
+        init_translation, reflect_translation, final_translation = translator(
+            source_lang=source_lang,
+            target_lang=target_lang,
+            source_text=source_text,
+            country=country,
+            max_tokens=max_tokens,
+        )
     final_diff = gr.HighlightedText(
         diff_texts(init_translation, final_translation),
     docs = SimpleDirectoryReader(input_files=[file]).load_data()
     return docs[0].text
+def enable_sec(choice):
+    if choice:
+        return gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)
+    else:
+        return gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)
 TITLE = """
 <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
 """
                 choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
                 value="OpenAI",
             )
+            choice = gr.Checkbox(label="Second Endpoint", info="Add second endpoint for reflection")
             model = gr.Textbox(label="Model", value="gpt-4o", )
             api_key = gr.Textbox(label="API_KEY", type="password", )
+            endpoint2 = gr.Dropdown(
+                label="Endpoint 2",
+                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
+                value="OpenAI",
+                visible=False,
+            )
+            model2 = gr.Textbox(label="Model 2", value="gpt-4o", visible=False, )
+            api_key2 = gr.Textbox(label="API_KEY 2", type="password", visible=False,)
             source_lang = gr.Textbox(
                 label="Source Lang",
                 value="English",
         clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
     endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
+    choice.select(fn=enable_sec, inputs=[choice], outputs=[endpoint2, model2, api_key2])
+    endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
+    submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
     upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
 if __name__ == "__main__":

app/webui/patch.py CHANGED Viewed

@@ -94,7 +94,6 @@ def get_completion(
             response = llm.chat(
                 messages=messages,
                 temperature=temperature,
-                top_p=1,
             )
             return response.message.content
         else:
@@ -108,7 +107,6 @@ def get_completion(
             if json_mode:
                 response = llm.chat(
                     temperature=temperature,
-                    top_p=1,
                     response_format={"type": "json_object"},
                     messages=messages,
                 )
@@ -116,7 +114,6 @@ def get_completion(
             else:
                 response = llm.chat(
                     temperature=temperature,
-                    top_p=1,
                     messages=messages,
                 )
                 return response.message.content

             response = llm.chat(
                 messages=messages,
                 temperature=temperature,
             )
             return response.message.content
         else:
             if json_mode:
                 response = llm.chat(
                     temperature=temperature,
                     response_format={"type": "json_object"},
                     messages=messages,
                 )
             else:
                 response = llm.chat(
                     temperature=temperature,
                     messages=messages,
                 )
                 return response.message.content

app/webui/process.py CHANGED Viewed

@@ -56,6 +56,7 @@ def translator(
         country,
         max_tokens=1000,
 ):
     """Translate the source_text from source_lang to target_lang."""
     num_tokens_in_text = num_tokens_in_string(source_text)
@@ -125,4 +126,91 @@ def translator(
         return init_translation, reflection, final_translation

         country,
         max_tokens=1000,
 ):
     """Translate the source_text from source_lang to target_lang."""
     num_tokens_in_text = num_tokens_in_string(source_text)
         return init_translation, reflection, final_translation
+def translator_sec(
+        endpoint2,
+        model2,
+        api_key2,
+        context_window,
+        num_output,
+        source_lang,
+        target_lang,
+        source_text,
+        country,
+        max_tokens=1000,
+):
+    """Translate the source_text from source_lang to target_lang."""
+    num_tokens_in_text = num_tokens_in_string(source_text)
+    ic(num_tokens_in_text)
+    if num_tokens_in_text < max_tokens:
+        ic("Translating text as single chunk")
+        #Note: use yield from B() if put yield in function B()
+        init_translation = one_chunk_initial_translation(
+            source_lang, target_lang, source_text
+        )
+        reflection = one_chunk_reflect_on_translation(
+            source_lang, target_lang, source_text, init_translation, country
+        )
+        try:
+            model_load(endpoint2, model2, api_key2, context_window, num_output)
+        except Exception as e:
+            raise gr.Error(f"An unexpected error occurred: {e}")
+        final_translation = one_chunk_improve_translation(
+            source_lang, target_lang, source_text, init_translation, reflection
+        )
+        return init_translation, reflection, final_translation
+    else:
+        ic("Translating text as multiple chunks")
+        token_size = calculate_chunk_size(
+            token_count=num_tokens_in_text, token_limit=max_tokens
+        )
+        ic(token_size)
+        #using sentence splitter
+        text_parser = SentenceSplitter(
+           chunk_size=token_size,
+        )
+        source_text_chunks = text_parser.split_text(source_text)
+        translation_1_chunks = multichunk_initial_translation(
+            source_lang, target_lang, source_text_chunks
+        )
+        init_translation = "".join(translation_1_chunks)
+        try:
+            model_load(endpoint2, model2, api_key2, context_window, num_output)
+        except Exception as e:
+            raise gr.Error(f"An unexpected error occurred: {e}")
+        reflection_chunks = multichunk_reflect_on_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            country,
+        )
+        reflection = "".join(reflection_chunks)
+        translation_2_chunks = multichunk_improve_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            reflection_chunks,
+        )
+        final_translation = "".join(translation_2_chunks)
+        return init_translation, reflection, final_translation

app/webui/requirements.txt CHANGED Viewed

@@ -9,4 +9,4 @@ tiktoken
 icecream
 nltk
 langchain-text-splitters
-gradio

 icecream
 nltk
 langchain-text-splitters
+gradio