vilarin commited on
Commit
3bf46ba
·
verified ·
1 Parent(s): 703602c

Upload 12 files

Browse files
app/webui/README.md CHANGED
@@ -3,6 +3,10 @@
3
 
4
  This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
5
 
 
 
 
 
6
  **Features:**
7
 
8
  - **Tokenized Text:** Displays translated text with tokenization, highlighting differences between original and translated words.
@@ -21,6 +25,7 @@ Llama Index supported, easily extendable
21
  **Getting Started**
22
 
23
  1. **Install Dependencies:**
 
24
  **Linux(Using Python Venv)**
25
  ```bash
26
  git clone https://github.com/andrewyng/translation-agent.git
@@ -63,9 +68,11 @@ Llama Index supported, easily extendable
63
  **Usage:**
64
 
65
  1. Select your desired translation API from the Endpoint dropdown menu.
66
- 2. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
67
- 3. Input the source text or upload your document file.
68
- 4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
 
 
69
 
70
  **Customization:**
71
 
@@ -79,3 +86,6 @@ Contributions are welcome! Feel free to open issues or submit pull requests.
79
 
80
  This project is licensed under the MIT License.
81
 
 
 
 
 
3
 
4
  This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
5
 
6
+ ### Preview
7
+
8
+ ![webui](image.png)
9
+
10
  **Features:**
11
 
12
  - **Tokenized Text:** Displays translated text with tokenization, highlighting differences between original and translated words.
 
25
  **Getting Started**
26
 
27
  1. **Install Dependencies:**
28
+
29
  **Linux(Using Python Venv)**
30
  ```bash
31
  git clone https://github.com/andrewyng/translation-agent.git
 
68
  **Usage:**
69
 
70
  1. Select your desired translation API from the Endpoint dropdown menu.
71
+ 2. Input the source language, target language, and country(optional).
72
+ 3. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
73
+ 4. Input the source text or upload your document file.
74
+ 5. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
75
+ 6. Enable Second Endpoint, you can add another endpoint by different LLMs for reflection.
76
 
77
  **Customization:**
78
 
 
86
 
87
  This project is licensed under the MIT License.
88
 
89
+ **DEMO:**
90
+
91
+ [Huggingface Demo](https://huggingface.co/spaces/vilarin/Translation-Agent-WebUI)
app/webui/__pycache__/app.cpython-310.pyc CHANGED
Binary files a/app/webui/__pycache__/app.cpython-310.pyc and b/app/webui/__pycache__/app.cpython-310.pyc differ
 
app/webui/__pycache__/patch.cpython-310.pyc CHANGED
Binary files a/app/webui/__pycache__/patch.cpython-310.pyc and b/app/webui/__pycache__/patch.cpython-310.pyc differ
 
app/webui/__pycache__/process.cpython-310.pyc CHANGED
Binary files a/app/webui/__pycache__/process.cpython-310.pyc and b/app/webui/__pycache__/process.cpython-310.pyc differ
 
app/webui/app.py CHANGED
@@ -7,13 +7,17 @@ sys.path.insert(0, project_root)
7
 
8
  import re
9
  import gradio as gr
10
- from app.webui.process import model_load, diff_texts, translator
11
  from llama_index.core import SimpleDirectoryReader
12
 
13
  def huanik(
14
  endpoint,
15
  model,
16
  api_key,
 
 
 
 
17
  source_lang,
18
  target_lang,
19
  source_text,
@@ -33,13 +37,28 @@ def huanik(
33
 
34
  source_text = re.sub(r'\n+', '\n', source_text)
35
 
36
- init_translation, reflect_translation, final_translation = translator(
37
- source_lang=source_lang,
38
- target_lang=target_lang,
39
- source_text=source_text,
40
- country=country,
41
- max_tokens=max_tokens,
42
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  final_diff = gr.HighlightedText(
45
  diff_texts(init_translation, final_translation),
@@ -66,6 +85,13 @@ def read_doc(file):
66
  docs = SimpleDirectoryReader(input_files=[file]).load_data()
67
  return docs[0].text
68
 
 
 
 
 
 
 
 
69
  TITLE = """
70
  <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
71
  """
@@ -91,8 +117,17 @@ with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
91
  choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
92
  value="OpenAI",
93
  )
 
94
  model = gr.Textbox(label="Model", value="gpt-4o", )
95
  api_key = gr.Textbox(label="API_KEY", type="password", )
 
 
 
 
 
 
 
 
96
  source_lang = gr.Textbox(
97
  label="Source Lang",
98
  value="English",
@@ -146,7 +181,9 @@ with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
146
  clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
147
 
148
  endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
149
- submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
 
 
150
  upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
151
 
152
  if __name__ == "__main__":
 
7
 
8
  import re
9
  import gradio as gr
10
+ from app.webui.process import model_load, diff_texts, translator, translator_sec
11
  from llama_index.core import SimpleDirectoryReader
12
 
13
  def huanik(
14
  endpoint,
15
  model,
16
  api_key,
17
+ choice,
18
+ endpoint2,
19
+ model2,
20
+ api_key2,
21
  source_lang,
22
  target_lang,
23
  source_text,
 
37
 
38
  source_text = re.sub(r'\n+', '\n', source_text)
39
 
40
+ if choice:
41
+ init_translation, reflect_translation, final_translation = translator_sec(
42
+ endpoint2=endpoint2,
43
+ model2=model2,
44
+ api_key2=api_key2,
45
+ context_window=context_window,
46
+ num_output=num_output,
47
+ source_lang=source_lang,
48
+ target_lang=target_lang,
49
+ source_text=source_text,
50
+ country=country,
51
+ max_tokens=max_tokens,
52
+ )
53
+
54
+ else:
55
+ init_translation, reflect_translation, final_translation = translator(
56
+ source_lang=source_lang,
57
+ target_lang=target_lang,
58
+ source_text=source_text,
59
+ country=country,
60
+ max_tokens=max_tokens,
61
+ )
62
 
63
  final_diff = gr.HighlightedText(
64
  diff_texts(init_translation, final_translation),
 
85
  docs = SimpleDirectoryReader(input_files=[file]).load_data()
86
  return docs[0].text
87
 
88
+ def enable_sec(choice):
89
+ if choice:
90
+ return gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)
91
+ else:
92
+ return gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)
93
+
94
+
95
  TITLE = """
96
  <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
97
  """
 
117
  choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
118
  value="OpenAI",
119
  )
120
+ choice = gr.Checkbox(label="Second Endpoint", info="Add second endpoint for reflection")
121
  model = gr.Textbox(label="Model", value="gpt-4o", )
122
  api_key = gr.Textbox(label="API_KEY", type="password", )
123
+ endpoint2 = gr.Dropdown(
124
+ label="Endpoint 2",
125
+ choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
126
+ value="OpenAI",
127
+ visible=False,
128
+ )
129
+ model2 = gr.Textbox(label="Model 2", value="gpt-4o", visible=False, )
130
+ api_key2 = gr.Textbox(label="API_KEY 2", type="password", visible=False,)
131
  source_lang = gr.Textbox(
132
  label="Source Lang",
133
  value="English",
 
181
  clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
182
 
183
  endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
184
+ choice.select(fn=enable_sec, inputs=[choice], outputs=[endpoint2, model2, api_key2])
185
+ endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
186
+ submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
187
  upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
188
 
189
  if __name__ == "__main__":
app/webui/patch.py CHANGED
@@ -94,7 +94,6 @@ def get_completion(
94
  response = llm.chat(
95
  messages=messages,
96
  temperature=temperature,
97
- top_p=1,
98
  )
99
  return response.message.content
100
  else:
@@ -108,7 +107,6 @@ def get_completion(
108
  if json_mode:
109
  response = llm.chat(
110
  temperature=temperature,
111
- top_p=1,
112
  response_format={"type": "json_object"},
113
  messages=messages,
114
  )
@@ -116,7 +114,6 @@ def get_completion(
116
  else:
117
  response = llm.chat(
118
  temperature=temperature,
119
- top_p=1,
120
  messages=messages,
121
  )
122
  return response.message.content
 
94
  response = llm.chat(
95
  messages=messages,
96
  temperature=temperature,
 
97
  )
98
  return response.message.content
99
  else:
 
107
  if json_mode:
108
  response = llm.chat(
109
  temperature=temperature,
 
110
  response_format={"type": "json_object"},
111
  messages=messages,
112
  )
 
114
  else:
115
  response = llm.chat(
116
  temperature=temperature,
 
117
  messages=messages,
118
  )
119
  return response.message.content
app/webui/process.py CHANGED
@@ -56,6 +56,7 @@ def translator(
56
  country,
57
  max_tokens=1000,
58
  ):
 
59
  """Translate the source_text from source_lang to target_lang."""
60
  num_tokens_in_text = num_tokens_in_string(source_text)
61
 
@@ -125,4 +126,91 @@ def translator(
125
  return init_translation, reflection, final_translation
126
 
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  country,
57
  max_tokens=1000,
58
  ):
59
+
60
  """Translate the source_text from source_lang to target_lang."""
61
  num_tokens_in_text = num_tokens_in_string(source_text)
62
 
 
126
  return init_translation, reflection, final_translation
127
 
128
 
129
+ def translator_sec(
130
+ endpoint2,
131
+ model2,
132
+ api_key2,
133
+ context_window,
134
+ num_output,
135
+ source_lang,
136
+ target_lang,
137
+ source_text,
138
+ country,
139
+ max_tokens=1000,
140
+ ):
141
+
142
+ """Translate the source_text from source_lang to target_lang."""
143
+ num_tokens_in_text = num_tokens_in_string(source_text)
144
+
145
+ ic(num_tokens_in_text)
146
+
147
+ if num_tokens_in_text < max_tokens:
148
+ ic("Translating text as single chunk")
149
+
150
+ #Note: use yield from B() if put yield in function B()
151
+ init_translation = one_chunk_initial_translation(
152
+ source_lang, target_lang, source_text
153
+ )
154
+
155
+
156
+ reflection = one_chunk_reflect_on_translation(
157
+ source_lang, target_lang, source_text, init_translation, country
158
+ )
159
+ try:
160
+ model_load(endpoint2, model2, api_key2, context_window, num_output)
161
+ except Exception as e:
162
+ raise gr.Error(f"An unexpected error occurred: {e}")
163
+ final_translation = one_chunk_improve_translation(
164
+ source_lang, target_lang, source_text, init_translation, reflection
165
+ )
166
+
167
+ return init_translation, reflection, final_translation
168
+
169
+ else:
170
+ ic("Translating text as multiple chunks")
171
+
172
+ token_size = calculate_chunk_size(
173
+ token_count=num_tokens_in_text, token_limit=max_tokens
174
+ )
175
+
176
+ ic(token_size)
177
+
178
+ #using sentence splitter
179
+ text_parser = SentenceSplitter(
180
+ chunk_size=token_size,
181
+ )
182
+
183
+ source_text_chunks = text_parser.split_text(source_text)
184
 
185
+ translation_1_chunks = multichunk_initial_translation(
186
+ source_lang, target_lang, source_text_chunks
187
+ )
188
+
189
+ init_translation = "".join(translation_1_chunks)
190
+
191
+ try:
192
+ model_load(endpoint2, model2, api_key2, context_window, num_output)
193
+ except Exception as e:
194
+ raise gr.Error(f"An unexpected error occurred: {e}")
195
+
196
+ reflection_chunks = multichunk_reflect_on_translation(
197
+ source_lang,
198
+ target_lang,
199
+ source_text_chunks,
200
+ translation_1_chunks,
201
+ country,
202
+ )
203
+
204
+ reflection = "".join(reflection_chunks)
205
+
206
+ translation_2_chunks = multichunk_improve_translation(
207
+ source_lang,
208
+ target_lang,
209
+ source_text_chunks,
210
+ translation_1_chunks,
211
+ reflection_chunks,
212
+ )
213
+
214
+ final_translation = "".join(translation_2_chunks)
215
+
216
+ return init_translation, reflection, final_translation
app/webui/requirements.txt CHANGED
@@ -9,4 +9,4 @@ tiktoken
9
  icecream
10
  nltk
11
  langchain-text-splitters
12
- gradio
 
9
  icecream
10
  nltk
11
  langchain-text-splitters
12
+ gradio