vilarin commited on
Commit
ad326f0
·
verified ·
1 Parent(s): 55feb21

Upload 3 files

Browse files
Files changed (3) hide show
  1. app/webui/app.py +277 -244
  2. app/webui/patch.py +33 -1
  3. app/webui/process.py +213 -213
app/webui/app.py CHANGED
@@ -1,245 +1,278 @@
1
- import sys
2
- import os
3
-
4
- # Add the project root to the Python path
5
- project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
6
- sys.path.insert(0, project_root)
7
-
8
- import re
9
- import gradio as gr
10
- from app.webui.process import model_load, diff_texts, translator, translator_sec
11
- from llama_index.core import SimpleDirectoryReader
12
-
13
- def huanik(
14
- endpoint,
15
- model,
16
- api_key,
17
- choice,
18
- endpoint2,
19
- model2,
20
- api_key2,
21
- source_lang,
22
- target_lang,
23
- source_text,
24
- country,
25
- max_tokens,
26
- context_window,
27
- num_output,
28
- ):
29
-
30
- if not source_text or source_lang == target_lang:
31
- raise gr.Error("Please check that the content or options are entered correctly.")
32
-
33
- try:
34
- model_load(endpoint, model, api_key, context_window, num_output)
35
- except Exception as e:
36
- raise gr.Error(f"An unexpected error occurred: {e}")
37
-
38
- source_text = re.sub(r'(?m)^\s*$\n?', '', source_text)
39
-
40
- if choice:
41
- init_translation, reflect_translation, final_translation = translator_sec(
42
- endpoint2=endpoint2,
43
- model2=model2,
44
- api_key2=api_key2,
45
- context_window=context_window,
46
- num_output=num_output,
47
- source_lang=source_lang,
48
- target_lang=target_lang,
49
- source_text=source_text,
50
- country=country,
51
- max_tokens=max_tokens,
52
- )
53
-
54
- else:
55
- init_translation, reflect_translation, final_translation = translator(
56
- source_lang=source_lang,
57
- target_lang=target_lang,
58
- source_text=source_text,
59
- country=country,
60
- max_tokens=max_tokens,
61
- )
62
-
63
- final_diff = gr.HighlightedText(
64
- diff_texts(init_translation, final_translation),
65
- label="Diff translation",
66
- combine_adjacent=True,
67
- show_legend=True,
68
- visible=True,
69
- color_map={"removed": "red", "added": "green"})
70
-
71
- return init_translation, reflect_translation, final_translation, final_diff
72
-
73
- def update_model(endpoint):
74
- endpoint_model_map = {
75
- "Groq": "llama3-70b-8192",
76
- "OpenAI": "gpt-4o",
77
- "Cohere": "command-r",
78
- "TogetherAI": "Qwen/Qwen2-72B-Instruct",
79
- "Ollama": "llama3",
80
- "Huggingface": "mistralai/Mistral-7B-Instruct-v0.3"
81
- }
82
- return gr.update(value=endpoint_model_map[endpoint])
83
-
84
- def read_doc(file):
85
- docs = SimpleDirectoryReader(input_files=[file]).load_data()
86
- texts = ""
87
- for doc in docs:
88
- texts += doc.text
89
- texts = re.sub(r'(?m)^\s*$\n?', '', texts)
90
- return texts
91
-
92
- def enable_sec(choice):
93
- if choice:
94
- return gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)
95
- else:
96
- return gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)
97
-
98
- def update_menu(visible):
99
- return not visible, gr.update(visible=not visible)
100
-
101
- TITLE = """
102
- <div style="display: inline-flex;">
103
- <div style="margin-left: 6px; font-size:32px; color: #6366f1"><b>Translation Agent</b> WebUI</div>
104
- </div>
105
- """
106
-
107
- CSS = """
108
- h1 {
109
- text-align: center;
110
- display: block;
111
- height: 10vh;
112
- align-content: center;
113
- }
114
- footer {
115
- visibility: hidden;
116
- }
117
- .menu_btn {
118
- width: 48px;
119
- height: 48px;
120
- max-width: 48px;
121
- min-width: 48px;
122
- padding: 0px;
123
- background-color: transparent;
124
- border: none;
125
- cursor: pointer;
126
- position: relative;
127
- box-shadow: none;
128
- }
129
- .menu_btn::before,
130
- .menu_btn::after {
131
- content: '';
132
- position: absolute;
133
- width: 30px;
134
- height: 3px;
135
- background-color: #4f46e5;
136
- transition: transform 0.3s ease;
137
- }
138
- .menu_btn::before {
139
- top: 12px;
140
- box-shadow: 0 8px 0 #6366f1;
141
- }
142
- .menu_btn::after {
143
- bottom: 16px;
144
- }
145
- .menu_btn.active::before {
146
- transform: translateY(8px) rotate(45deg);
147
- box-shadow: none;
148
- }
149
- .menu_btn.active::after {
150
- transform: translateY(-8px) rotate(-45deg);
151
- }
152
- """
153
-
154
- JS = """
155
- function () {
156
- const menuBtn = document.getElementById('menu');
157
- menuBtn.classList.toggle('active');
158
- }
159
-
160
- """
161
-
162
- with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
163
- with gr.Row():
164
- visible = gr.State(value=True)
165
- menuBtn = gr.Button(value="", elem_classes="menu_btn", elem_id="menu", size="sm")
166
- gr.HTML(TITLE)
167
- with gr.Row():
168
- with gr.Column(scale=1) as menubar:
169
- endpoint = gr.Dropdown(
170
- label="Endpoint",
171
- choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
172
- value="Huggingface",
173
- )
174
- choice = gr.Checkbox(label="Second Endpoint", info="Add second endpoint for reflection")
175
- model = gr.Textbox(label="Model", value="mistralai/Mistral-7B-Instruct-v0.3", )
176
- api_key = gr.Textbox(label="API_KEY", type="password", )
177
- endpoint2 = gr.Dropdown(
178
- label="Endpoint 2",
179
- choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
180
- value="Groq",
181
- visible=False,
182
- )
183
- model2 = gr.Textbox(label="Model 2", value="llama3-70b-8192", visible=False,)
184
- api_key2 = gr.Textbox(label="API_KEY 2", type="password", visible=False,)
185
- source_lang = gr.Textbox(
186
- label="Source Lang",
187
- value="English",
188
- )
189
- target_lang = gr.Textbox(
190
- label="Target Lang",
191
- value="Spanish",
192
- )
193
- country = gr.Textbox(label="Country", value="Argentina", max_lines=1)
194
- with gr.Accordion("Advanced Options", open=False):
195
- max_tokens = gr.Slider(
196
- label="Max tokens Per Chunk",
197
- minimum=512,
198
- maximum=2046,
199
- value=1000,
200
- step=8,
201
- )
202
- context_window = gr.Slider(
203
- label="Context Window",
204
- minimum=512,
205
- maximum=8192,
206
- value=4096,
207
- step=8,
208
- )
209
- num_output = gr.Slider(
210
- label="Output Num",
211
- minimum=256,
212
- maximum=8192,
213
- value=512,
214
- step=8,
215
- )
216
- with gr.Column(scale=4):
217
- source_text = gr.Textbox(
218
- label="Source Text",
219
- value="How we live is so different from how we ought to live that he who studies "+\
220
- "what ought to be done rather than what is done will learn the way to his downfall "+\
221
- "rather than to his preservation.",
222
- lines=12,
223
- )
224
- with gr.Tab("Final"):
225
- output_final = gr.Textbox(label="FInal Translation", lines=12, show_copy_button=True)
226
- with gr.Tab("Initial"):
227
- output_init = gr.Textbox(label="Init Translation", lines=12, show_copy_button=True)
228
- with gr.Tab("Reflection"):
229
- output_reflect = gr.Textbox(label="Reflection", lines=12, show_copy_button=True)
230
- with gr.Tab("Diff"):
231
- output_diff = gr.HighlightedText(visible = False)
232
- with gr.Row():
233
- submit = gr.Button(value="Submit")
234
- upload = gr.UploadButton(label="Upload", file_types=["text"])
235
- clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
236
-
237
- menuBtn.click(fn=update_menu, inputs=visible, outputs=[visible, menubar], js=JS)
238
- endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
239
- choice.select(fn=enable_sec, inputs=[choice], outputs=[endpoint2, model2, api_key2])
240
- endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
241
- submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
242
- upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
243
-
244
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  demo.queue(api_open=False).launch(show_api=False, share=False)
 
1
+ import sys
2
+ import os
3
+
4
+ # Add the project root to the Python path
5
+ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
6
+ sys.path.insert(0, project_root)
7
+
8
+ import re
9
+ import gradio as gr
10
+ from glob import glob
11
+ from app.webui.process import model_load, diff_texts, translator, translator_sec
12
+ from llama_index.core import SimpleDirectoryReader
13
+
14
+ def huanik(
15
+ endpoint: str,
16
+ model: str,
17
+ api_key: str,
18
+ choice: str,
19
+ endpoint2: str,
20
+ model2: str,
21
+ api_key2: str,
22
+ source_lang: str,
23
+ target_lang: str,
24
+ source_text: str,
25
+ country: str,
26
+ max_tokens: int,
27
+ context_window: int,
28
+ num_output: int,
29
+ rpm: int,
30
+ ):
31
+
32
+ if not source_text or source_lang == target_lang:
33
+ raise gr.Error("Please check that the content or options are entered correctly.")
34
+
35
+ try:
36
+ model_load(endpoint, model, api_key, context_window, num_output, rpm)
37
+ except Exception as e:
38
+ raise gr.Error(f"An unexpected error occurred: {e}")
39
+
40
+ source_text = re.sub(r'(?m)^\s*$\n?', '', source_text)
41
+
42
+ if choice:
43
+ init_translation, reflect_translation, final_translation = translator_sec(
44
+ endpoint2=endpoint2,
45
+ model2=model2,
46
+ api_key2=api_key2,
47
+ context_window=context_window,
48
+ num_output=num_output,
49
+ source_lang=source_lang,
50
+ target_lang=target_lang,
51
+ source_text=source_text,
52
+ country=country,
53
+ max_tokens=max_tokens,
54
+ )
55
+
56
+ else:
57
+ init_translation, reflect_translation, final_translation = translator(
58
+ source_lang=source_lang,
59
+ target_lang=target_lang,
60
+ source_text=source_text,
61
+ country=country,
62
+ max_tokens=max_tokens,
63
+ )
64
+
65
+ final_diff = gr.HighlightedText(
66
+ diff_texts(init_translation, final_translation),
67
+ label="Diff translation",
68
+ combine_adjacent=True,
69
+ show_legend=True,
70
+ visible=True,
71
+ color_map={"removed": "red", "added": "green"})
72
+
73
+ return init_translation, reflect_translation, final_translation, final_diff
74
+
75
+ def update_model(endpoint):
76
+ endpoint_model_map = {
77
+ "Groq": "llama3-70b-8192",
78
+ "OpenAI": "gpt-4o",
79
+ "Cohere": "command-r",
80
+ "TogetherAI": "Qwen/Qwen2-72B-Instruct",
81
+ "Ollama": "llama3",
82
+ "Huggingface": "mistralai/Mistral-7B-Instruct-v0.3"
83
+ }
84
+ return gr.update(value=endpoint_model_map[endpoint])
85
+
86
+ def read_doc(file):
87
+ docs = SimpleDirectoryReader(input_files=[file]).load_data()
88
+ texts = ""
89
+ for doc in docs:
90
+ texts += doc.text
91
+ texts = re.sub(r'(?m)^\s*$\n?', '', texts)
92
+ return texts
93
+
94
+ def enable_sec(choice):
95
+ if choice:
96
+ return gr.update(visible = True), gr.update(visible = True), gr.update(visible = True)
97
+ else:
98
+ return gr.update(visible = False), gr.update(visible = False), gr.update(visible = False)
99
+
100
+ def update_menu(visible):
101
+ return not visible, gr.update(visible=not visible)
102
+
103
+ def export_txt(strings):
104
+ os.makedirs("outputs", exist_ok=True)
105
+ base_count = len(glob(os.path.join("outputs", "*.txt")))
106
+ file_path = os.path.join("outputs", f"{base_count:06d}.txt")
107
+ with open(file_path, "w", encoding="utf-8") as f:
108
+ f.write(strings)
109
+ return gr.update(value=file_path, visible=True)
110
+
111
+ def switch(source_lang,source_text,target_lang,output_final):
112
+ if output_final:
113
+ return gr.update(value=target_lang), gr.update(value=output_final), gr.update(value=source_lang), gr.update(value=source_text)
114
+ else:
115
+ return gr.update(value=target_lang), gr.update(value=source_text), gr.update(value=source_lang), gr.update(value="")
116
+
117
+ TITLE = """
118
+ <div style="display: inline-flex;">
119
+ <div style="margin-left: 6px; font-size:32px; color: #6366f1"><b>Translation Agent</b> WebUI</div>
120
+ </div>
121
+ """
122
+
123
+ CSS = """
124
+ h1 {
125
+ text-align: center;
126
+ display: block;
127
+ height: 10vh;
128
+ align-content: center;
129
+ }
130
+ footer {
131
+ visibility: hidden;
132
+ }
133
+ .menu_btn {
134
+ width: 48px;
135
+ height: 48px;
136
+ max-width: 48px;
137
+ min-width: 48px;
138
+ padding: 0px;
139
+ background-color: transparent;
140
+ border: none;
141
+ cursor: pointer;
142
+ position: relative;
143
+ box-shadow: none;
144
+ }
145
+ .menu_btn::before,
146
+ .menu_btn::after {
147
+ content: '';
148
+ position: absolute;
149
+ width: 30px;
150
+ height: 3px;
151
+ background-color: #4f46e5;
152
+ transition: transform 0.3s ease;
153
+ }
154
+ .menu_btn::before {
155
+ top: 12px;
156
+ box-shadow: 0 8px 0 #6366f1;
157
+ }
158
+ .menu_btn::after {
159
+ bottom: 16px;
160
+ }
161
+ .menu_btn.active::before {
162
+ transform: translateY(8px) rotate(45deg);
163
+ box-shadow: none;
164
+ }
165
+ .menu_btn.active::after {
166
+ transform: translateY(-8px) rotate(-45deg);
167
+ }
168
+ .lang {
169
+ max-width: 100px;
170
+ min-width: 100px;
171
+ }
172
+ """
173
+
174
+ JS = """
175
+ function () {
176
+ const menuBtn = document.getElementById('menu');
177
+ menuBtn.classList.toggle('active');
178
+ }
179
+
180
+ """
181
+
182
+ with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
183
+ with gr.Row():
184
+ visible = gr.State(value=True)
185
+ menuBtn = gr.Button(value="", elem_classes="menu_btn", elem_id="menu", size="sm")
186
+ gr.HTML(TITLE)
187
+ with gr.Row():
188
+ with gr.Column(scale=1) as menubar:
189
+ endpoint = gr.Dropdown(
190
+ label="Endpoint",
191
+ choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
192
+ value="OpenAI",
193
+ )
194
+ choice = gr.Checkbox(label="Second Endpoint", info="Add second endpoint for reflection")
195
+ model = gr.Textbox(label="Model", value="gpt-4o", )
196
+ api_key = gr.Textbox(label="API_KEY", type="password", )
197
+ endpoint2 = gr.Dropdown(
198
+ label="Endpoint 2",
199
+ choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
200
+ value="OpenAI",
201
+ visible=False,
202
+ )
203
+ model2 = gr.Textbox(label="Model 2", value="gpt-4o", visible=False,)
204
+ api_key2 = gr.Textbox(label="API_KEY 2", type="password", visible=False,)
205
+ with gr.Row():
206
+ source_lang = gr.Textbox(
207
+ label="Source Lang",
208
+ value="English",
209
+ elem_classes = "lang",
210
+ )
211
+ target_lang = gr.Textbox(
212
+ label="Target Lang",
213
+ value="Spanish",
214
+ elem_classes = "lang",
215
+ )
216
+ switchBtn = gr.Button(value="🔄️")
217
+ country = gr.Textbox(label="Country", value="Argentina", max_lines=1)
218
+ with gr.Accordion("Advanced Options", open=False):
219
+ max_tokens = gr.Slider(
220
+ label="Max tokens Per Chunk",
221
+ minimum=512,
222
+ maximum=2046,
223
+ value=1000,
224
+ step=8,
225
+ )
226
+ context_window = gr.Slider(
227
+ label="Context Window",
228
+ minimum=512,
229
+ maximum=8192,
230
+ value=4096,
231
+ step=8,
232
+ )
233
+ num_output = gr.Slider(
234
+ label="Output Num",
235
+ minimum=256,
236
+ maximum=8192,
237
+ value=512,
238
+ step=8,
239
+ )
240
+ rpm = gr.Slider(
241
+ label="Request Per Minute",
242
+ minimum=1,
243
+ maximum=1000,
244
+ value=60,
245
+ step=1,
246
+ )
247
+ with gr.Column(scale=4):
248
+ source_text = gr.Textbox(
249
+ label="Source Text",
250
+ value="How we live is so different from how we ought to live that he who studies "+\
251
+ "what ought to be done rather than what is done will learn the way to his downfall "+\
252
+ "rather than to his preservation.",
253
+ lines=12,
254
+ )
255
+ with gr.Tab("Final"):
256
+ output_final = gr.Textbox(label="FInal Translation", lines=12, show_copy_button=True)
257
+ with gr.Tab("Initial"):
258
+ output_init = gr.Textbox(label="Init Translation", lines=12, show_copy_button=True)
259
+ with gr.Tab("Reflection"):
260
+ output_reflect = gr.Textbox(label="Reflection", lines=12, show_copy_button=True)
261
+ with gr.Tab("Diff"):
262
+ output_diff = gr.HighlightedText(visible = False)
263
+ with gr.Row():
264
+ submit = gr.Button(value="Translate")
265
+ upload = gr.UploadButton(label="Upload", file_types=["text"])
266
+ export = gr.DownloadButton(visible=False)
267
+ clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
268
+
269
+ switchBtn.click(fn=switch, inputs=[source_lang,source_text,target_lang,output_final], outputs=[source_lang,source_text,target_lang,output_final])
270
+ menuBtn.click(fn=update_menu, inputs=visible, outputs=[visible, menubar], js=JS)
271
+ endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
272
+ choice.select(fn=enable_sec, inputs=[choice], outputs=[endpoint2, model2, api_key2])
273
+ endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
274
+ submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output, rpm], outputs=[output_init, output_reflect, output_final, output_diff])
275
+ upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
276
+ output_final.change(fn=export_txt, inputs=output_final, outputs=[export])
277
+ if __name__ == "__main__":
278
  demo.queue(api_open=False).launch(show_api=False, share=False)
app/webui/patch.py CHANGED
@@ -1,5 +1,8 @@
1
  # a monkey patch to use llama-index completion
2
  import os
 
 
 
3
  from typing import Union
4
  import src.translation_agent.utils as utils
5
 
@@ -13,15 +16,16 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
13
  from llama_index.core import Settings
14
  from llama_index.core.llms import ChatMessage
15
 
 
16
 
17
  # Add your LLMs here
18
-
19
  def model_load(
20
  endpoint: str,
21
  model: str,
22
  api_key: str = None,
23
  context_window: int = 4096,
24
  num_output: int = 512,
 
25
  ):
26
  if endpoint == "Groq":
27
  llm = Groq(
@@ -53,6 +57,10 @@ def model_load(
53
  token=api_key if api_key else os.getenv("HF_TOKEN"),
54
  task="text-generation",
55
  )
 
 
 
 
56
  Settings.llm = llm
57
  # maximum input size to the LLM
58
  Settings.context_window = context_window
@@ -60,7 +68,29 @@ def model_load(
60
  # number of tokens reserved for text generation.
61
  Settings.num_output = num_output
62
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
 
 
 
 
 
 
 
 
 
 
64
  def get_completion(
65
  prompt: str,
66
  system_message: str = "You are a helpful assistant.",
@@ -84,6 +114,7 @@ def get_completion(
84
  If json_mode is True, returns the complete API response as a dictionary.
85
  If json_mode is False, returns the generated text as a string.
86
  """
 
87
  llm = Settings.llm
88
  if llm.class_name() == "HuggingFaceInferenceAPI":
89
  llm.system_prompt = system_message
@@ -91,6 +122,7 @@ def get_completion(
91
  ChatMessage(
92
  role="user", content=prompt),
93
  ]
 
94
  response = llm.chat(
95
  messages=messages,
96
  temperature=temperature,
 
1
  # a monkey patch to use llama-index completion
2
  import os
3
+ import time
4
+ from functools import wraps
5
+ from threading import Lock
6
  from typing import Union
7
  import src.translation_agent.utils as utils
8
 
 
16
  from llama_index.core import Settings
17
  from llama_index.core.llms import ChatMessage
18
 
19
+ RPM = 60
20
 
21
  # Add your LLMs here
 
22
  def model_load(
23
  endpoint: str,
24
  model: str,
25
  api_key: str = None,
26
  context_window: int = 4096,
27
  num_output: int = 512,
28
+ rpm: int = RPM,
29
  ):
30
  if endpoint == "Groq":
31
  llm = Groq(
 
57
  token=api_key if api_key else os.getenv("HF_TOKEN"),
58
  task="text-generation",
59
  )
60
+
61
+ global RPM
62
+ RPM = rpm
63
+
64
  Settings.llm = llm
65
  # maximum input size to the LLM
66
  Settings.context_window = context_window
 
68
  # number of tokens reserved for text generation.
69
  Settings.num_output = num_output
70
 
71
+ def rate_limit(get_max_per_minute):
72
+ def decorator(func):
73
+ lock = Lock()
74
+ last_called = [0.0]
75
+
76
+ @wraps(func)
77
+ def wrapper(*args, **kwargs):
78
+ with lock:
79
+ max_per_minute = get_max_per_minute()
80
+ min_interval = 60.0 / max_per_minute
81
+ elapsed = time.time() - last_called[0]
82
+ left_to_wait = min_interval - elapsed
83
 
84
+ if left_to_wait > 0:
85
+ time.sleep(left_to_wait)
86
+
87
+ ret = func(*args, **kwargs)
88
+ last_called[0] = time.time()
89
+ return ret
90
+ return wrapper
91
+ return decorator
92
+
93
+ @rate_limit(lambda: RPM)
94
  def get_completion(
95
  prompt: str,
96
  system_message: str = "You are a helpful assistant.",
 
114
  If json_mode is True, returns the complete API response as a dictionary.
115
  If json_mode is False, returns the generated text as a string.
116
  """
117
+ print(time.localtime())
118
  llm = Settings.llm
119
  if llm.class_name() == "HuggingFaceInferenceAPI":
120
  llm.system_prompt = system_message
 
122
  ChatMessage(
123
  role="user", content=prompt),
124
  ]
125
+
126
  response = llm.chat(
127
  messages=messages,
128
  temperature=temperature,
app/webui/process.py CHANGED
@@ -1,213 +1,213 @@
1
- import gradio as gr
2
- from simplemma import simple_tokenizer
3
- from difflib import Differ
4
- from icecream import ic
5
- from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
6
- from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
7
-
8
- from llama_index.core.node_parser import SentenceSplitter
9
-
10
- def tokenize(text):
11
- # Use nltk to tokenize the text
12
- words = simple_tokenizer(text)
13
- # Check if the text contains spaces
14
- if ' ' in text:
15
- # Create a list of words and spaces
16
- tokens = []
17
- for word in words:
18
- tokens.append(word)
19
- if not word.startswith("'") and not word.endswith("'"): # Avoid adding space after punctuation
20
- tokens.append(' ') # Add space after each word
21
- return tokens[:-1] # Remove the last space
22
- else:
23
- return words
24
-
25
- def diff_texts(text1, text2):
26
- tokens1 = tokenize(text1)
27
- tokens2 = tokenize(text2)
28
-
29
- d = Differ()
30
- diff_result = list(d.compare(tokens1, tokens2))
31
-
32
- highlighted_text = []
33
- for token in diff_result:
34
- word = token[2:]
35
- category = None
36
- if token[0] == '+':
37
- category = 'added'
38
- elif token[0] == '-':
39
- category = 'removed'
40
- elif token[0] == '?':
41
- continue # Ignore the hints line
42
-
43
- highlighted_text.append((word, category))
44
-
45
- return highlighted_text
46
-
47
- #modified from src.translaation-agent.utils.tranlsate
48
- def translator(
49
- source_lang,
50
- target_lang,
51
- source_text,
52
- country,
53
- max_tokens=1000,
54
- ):
55
-
56
- """Translate the source_text from source_lang to target_lang."""
57
- num_tokens_in_text = num_tokens_in_string(source_text)
58
-
59
- ic(num_tokens_in_text)
60
-
61
- if num_tokens_in_text < max_tokens:
62
- ic("Translating text as single chunk")
63
-
64
- #Note: use yield from B() if put yield in function B()
65
- init_translation = one_chunk_initial_translation(
66
- source_lang, target_lang, source_text
67
- )
68
-
69
-
70
- reflection = one_chunk_reflect_on_translation(
71
- source_lang, target_lang, source_text, init_translation, country
72
- )
73
-
74
- final_translation = one_chunk_improve_translation(
75
- source_lang, target_lang, source_text, init_translation, reflection
76
- )
77
-
78
- return init_translation, reflection, final_translation
79
-
80
- else:
81
- ic("Translating text as multiple chunks")
82
-
83
- token_size = calculate_chunk_size(
84
- token_count=num_tokens_in_text, token_limit=max_tokens
85
- )
86
-
87
- ic(token_size)
88
-
89
- #using sentence splitter
90
- text_parser = SentenceSplitter(
91
- chunk_size=token_size,
92
- )
93
-
94
- source_text_chunks = text_parser.split_text(source_text)
95
-
96
- translation_1_chunks = multichunk_initial_translation(
97
- source_lang, target_lang, source_text_chunks
98
- )
99
-
100
- init_translation = "".join(translation_1_chunks)
101
-
102
- reflection_chunks = multichunk_reflect_on_translation(
103
- source_lang,
104
- target_lang,
105
- source_text_chunks,
106
- translation_1_chunks,
107
- country,
108
- )
109
-
110
- reflection = "".join(reflection_chunks)
111
-
112
- translation_2_chunks = multichunk_improve_translation(
113
- source_lang,
114
- target_lang,
115
- source_text_chunks,
116
- translation_1_chunks,
117
- reflection_chunks,
118
- )
119
-
120
- final_translation = "".join(translation_2_chunks)
121
-
122
- return init_translation, reflection, final_translation
123
-
124
-
125
- def translator_sec(
126
- endpoint2,
127
- model2,
128
- api_key2,
129
- context_window,
130
- num_output,
131
- source_lang,
132
- target_lang,
133
- source_text,
134
- country,
135
- max_tokens=1000,
136
- ):
137
-
138
- """Translate the source_text from source_lang to target_lang."""
139
- num_tokens_in_text = num_tokens_in_string(source_text)
140
-
141
- ic(num_tokens_in_text)
142
-
143
- if num_tokens_in_text < max_tokens:
144
- ic("Translating text as single chunk")
145
-
146
- #Note: use yield from B() if put yield in function B()
147
- init_translation = one_chunk_initial_translation(
148
- source_lang, target_lang, source_text
149
- )
150
-
151
- try:
152
- model_load(endpoint2, model2, api_key2, context_window, num_output)
153
- except Exception as e:
154
- raise gr.Error(f"An unexpected error occurred: {e}")
155
-
156
- reflection = one_chunk_reflect_on_translation(
157
- source_lang, target_lang, source_text, init_translation, country
158
- )
159
-
160
- final_translation = one_chunk_improve_translation(
161
- source_lang, target_lang, source_text, init_translation, reflection
162
- )
163
-
164
- return init_translation, reflection, final_translation
165
-
166
- else:
167
- ic("Translating text as multiple chunks")
168
-
169
- token_size = calculate_chunk_size(
170
- token_count=num_tokens_in_text, token_limit=max_tokens
171
- )
172
-
173
- ic(token_size)
174
-
175
- #using sentence splitter
176
- text_parser = SentenceSplitter(
177
- chunk_size=token_size,
178
- )
179
-
180
- source_text_chunks = text_parser.split_text(source_text)
181
-
182
- translation_1_chunks = multichunk_initial_translation(
183
- source_lang, target_lang, source_text_chunks
184
- )
185
-
186
- init_translation = "".join(translation_1_chunks)
187
-
188
- try:
189
- model_load(endpoint2, model2, api_key2, context_window, num_output)
190
- except Exception as e:
191
- raise gr.Error(f"An unexpected error occurred: {e}")
192
-
193
- reflection_chunks = multichunk_reflect_on_translation(
194
- source_lang,
195
- target_lang,
196
- source_text_chunks,
197
- translation_1_chunks,
198
- country,
199
- )
200
-
201
- reflection = "".join(reflection_chunks)
202
-
203
- translation_2_chunks = multichunk_improve_translation(
204
- source_lang,
205
- target_lang,
206
- source_text_chunks,
207
- translation_1_chunks,
208
- reflection_chunks,
209
- )
210
-
211
- final_translation = "".join(translation_2_chunks)
212
-
213
- return init_translation, reflection, final_translation
 
1
+ import gradio as gr
2
+ from simplemma import simple_tokenizer
3
+ from difflib import Differ
4
+ from icecream import ic
5
+ from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
6
+ from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
7
+
8
+ from llama_index.core.node_parser import SentenceSplitter
9
+
10
+ def tokenize(text):
11
+ # Use nltk to tokenize the text
12
+ words = simple_tokenizer(text)
13
+ # Check if the text contains spaces
14
+ if ' ' in text:
15
+ # Create a list of words and spaces
16
+ tokens = []
17
+ for word in words:
18
+ tokens.append(word)
19
+ if not word.startswith("'") and not word.endswith("'"): # Avoid adding space after punctuation
20
+ tokens.append(' ') # Add space after each word
21
+ return tokens[:-1] # Remove the last space
22
+ else:
23
+ return words
24
+
25
+ def diff_texts(text1, text2):
26
+ tokens1 = tokenize(text1)
27
+ tokens2 = tokenize(text2)
28
+
29
+ d = Differ()
30
+ diff_result = list(d.compare(tokens1, tokens2))
31
+
32
+ highlighted_text = []
33
+ for token in diff_result:
34
+ word = token[2:]
35
+ category = None
36
+ if token[0] == '+':
37
+ category = 'added'
38
+ elif token[0] == '-':
39
+ category = 'removed'
40
+ elif token[0] == '?':
41
+ continue # Ignore the hints line
42
+
43
+ highlighted_text.append((word, category))
44
+
45
+ return highlighted_text
46
+
47
+ #modified from src.translaation-agent.utils.tranlsate
48
+ def translator(
49
+ source_lang: str,
50
+ target_lang: str,
51
+ source_text: str,
52
+ country: str,
53
+ max_tokens:int = 1000,
54
+ ):
55
+
56
+ """Translate the source_text from source_lang to target_lang."""
57
+ num_tokens_in_text = num_tokens_in_string(source_text)
58
+
59
+ ic(num_tokens_in_text)
60
+
61
+ if num_tokens_in_text < max_tokens:
62
+ ic("Translating text as single chunk")
63
+
64
+ #Note: use yield from B() if put yield in function B()
65
+ init_translation = one_chunk_initial_translation(
66
+ source_lang, target_lang, source_text
67
+ )
68
+
69
+
70
+ reflection = one_chunk_reflect_on_translation(
71
+ source_lang, target_lang, source_text, init_translation, country
72
+ )
73
+
74
+ final_translation = one_chunk_improve_translation(
75
+ source_lang, target_lang, source_text, init_translation, reflection
76
+ )
77
+
78
+ return init_translation, reflection, final_translation
79
+
80
+ else:
81
+ ic("Translating text as multiple chunks")
82
+
83
+ token_size = calculate_chunk_size(
84
+ token_count=num_tokens_in_text, token_limit=max_tokens
85
+ )
86
+
87
+ ic(token_size)
88
+
89
+ #using sentence splitter
90
+ text_parser = SentenceSplitter(
91
+ chunk_size=token_size,
92
+ )
93
+
94
+ source_text_chunks = text_parser.split_text(source_text)
95
+
96
+ translation_1_chunks = multichunk_initial_translation(
97
+ source_lang, target_lang, source_text_chunks
98
+ )
99
+
100
+ init_translation = "".join(translation_1_chunks)
101
+
102
+ reflection_chunks = multichunk_reflect_on_translation(
103
+ source_lang,
104
+ target_lang,
105
+ source_text_chunks,
106
+ translation_1_chunks,
107
+ country,
108
+ )
109
+
110
+ reflection = "".join(reflection_chunks)
111
+
112
+ translation_2_chunks = multichunk_improve_translation(
113
+ source_lang,
114
+ target_lang,
115
+ source_text_chunks,
116
+ translation_1_chunks,
117
+ reflection_chunks,
118
+ )
119
+
120
+ final_translation = "".join(translation_2_chunks)
121
+
122
+ return init_translation, reflection, final_translation
123
+
124
+
125
+ def translator_sec(
126
+ endpoint2: str,
127
+ model2: str,
128
+ api_key2: str,
129
+ context_window: int,
130
+ num_output: int,
131
+ source_lang: str,
132
+ target_lang: str,
133
+ source_text: str,
134
+ country: str,
135
+ max_tokens: int = 1000,
136
+ ):
137
+
138
+ """Translate the source_text from source_lang to target_lang."""
139
+ num_tokens_in_text = num_tokens_in_string(source_text)
140
+
141
+ ic(num_tokens_in_text)
142
+
143
+ if num_tokens_in_text < max_tokens:
144
+ ic("Translating text as single chunk")
145
+
146
+ #Note: use yield from B() if put yield in function B()
147
+ init_translation = one_chunk_initial_translation(
148
+ source_lang, target_lang, source_text
149
+ )
150
+
151
+ try:
152
+ model_load(endpoint2, model2, api_key2, context_window, num_output)
153
+ except Exception as e:
154
+ raise gr.Error(f"An unexpected error occurred: {e}")
155
+
156
+ reflection = one_chunk_reflect_on_translation(
157
+ source_lang, target_lang, source_text, init_translation, country
158
+ )
159
+
160
+ final_translation = one_chunk_improve_translation(
161
+ source_lang, target_lang, source_text, init_translation, reflection
162
+ )
163
+
164
+ return init_translation, reflection, final_translation
165
+
166
+ else:
167
+ ic("Translating text as multiple chunks")
168
+
169
+ token_size = calculate_chunk_size(
170
+ token_count=num_tokens_in_text, token_limit=max_tokens
171
+ )
172
+
173
+ ic(token_size)
174
+
175
+ #using sentence splitter
176
+ text_parser = SentenceSplitter(
177
+ chunk_size=token_size,
178
+ )
179
+
180
+ source_text_chunks = text_parser.split_text(source_text)
181
+
182
+ translation_1_chunks = multichunk_initial_translation(
183
+ source_lang, target_lang, source_text_chunks
184
+ )
185
+
186
+ init_translation = "".join(translation_1_chunks)
187
+
188
+ try:
189
+ model_load(endpoint2, model2, api_key2, context_window, num_output)
190
+ except Exception as e:
191
+ raise gr.Error(f"An unexpected error occurred: {e}")
192
+
193
+ reflection_chunks = multichunk_reflect_on_translation(
194
+ source_lang,
195
+ target_lang,
196
+ source_text_chunks,
197
+ translation_1_chunks,
198
+ country,
199
+ )
200
+
201
+ reflection = "".join(reflection_chunks)
202
+
203
+ translation_2_chunks = multichunk_improve_translation(
204
+ source_lang,
205
+ target_lang,
206
+ source_text_chunks,
207
+ translation_1_chunks,
208
+ reflection_chunks,
209
+ )
210
+
211
+ final_translation = "".join(translation_2_chunks)
212
+
213
+ return init_translation, reflection, final_translation