theQuert commited on
Commit
eef2376
·
1 Parent(s): 38e5dcb

Update to support highlighting differences

Browse files
Files changed (1) hide show
  1. app.py +34 -25
app.py CHANGED
@@ -115,7 +115,6 @@ def split_article(article, trigger):
115
  paragraphs = article.replace("\\c\\c", "\c\c").split("\\\\c\\\\c")
116
  format_pars = [par for par in paragraphs]
117
  pars = [str(par) + " -- " + str(trigger) for par in paragraphs]
118
- # pd.DataFrame({"paragraph": pars}).to_csv("./util/experiments/input_paragraphs.csv")
119
  formatted_input = "\n".join(format_pars)
120
  return pars, formatted_input
121
 
@@ -123,20 +122,16 @@ def config():
123
  load_dotenv()
124
 
125
  def call_gpt(paragraph, trigger):
126
- # openai.api_key = os.environ.get("GPT_API")
127
- openai.api_key = "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"
128
  tokenizer = BartTokenizer.from_pretrained("theQuert/NetKUp-tokenzier")
129
  inputs_for_gpt = f"""
130
- As an article writer, your task is to provide an updated paragraph in the length same as non-updated paragraph based on the given non-updated paragraph and a triggered news.
131
  Non-updated paragraph:
132
  {paragraph}
133
 
134
  Triggered News:
135
  {trigger}
136
  """
137
- # merged_with_prompts.append(merged.strip())
138
- # pd.DataFrame({"paragraph": merged_with_prompts}).to_csv("./experiments/paragraphs_with_prompts.csv")
139
-
140
  completion = openai.ChatCompletion.create(
141
  model = "gpt-3.5-turbo",
142
  messages = [
@@ -144,6 +139,8 @@ As an article writer, your task is to provide an updated paragraph in the length
144
  ]
145
  )
146
  response = completion.choices[0].message.content
 
 
147
  return str(response)
148
 
149
  def call_vicuna(paragraphs_tirgger):
@@ -165,9 +162,19 @@ As an article writer, your task is to provide an updated paragraph in the length
165
 
166
 
167
  def main(input_article, input_trigger):
168
- csv_path = "./util/experiments/input_paragraphs.csv"
169
- if os.path.isfile(csv_path):
170
- os.remove(csv_path)
 
 
 
 
 
 
 
 
 
 
171
  modified = "TRUE"
172
  # device = "cuda" if torch.cuda.is_available() else "cpu"
173
  device="cpu"
@@ -238,12 +245,21 @@ def main(input_article, input_trigger):
238
 
239
  # feed the positive paragraphs to decoder
240
  paragraphs_needed = [data_test[idx] for idx in pos_ids]
 
241
  pd.DataFrame({"paragraph": paragraphs_needed}).to_csv("./util/experiments/paragraphs_needed.csv", index=False)
 
 
 
 
242
 
243
  # updated_paragraphs = decode(input_paragraph, input_trigger)
244
- config()
245
- updated_paragraphs = [call_gpt(paragraph.split(" -- ")[0], input_trigger) for paragraph in paragraphs_needed]
246
  # updated_paragraphs = call_vicuna(paragraphs_needed, input_trigger)
 
 
 
 
 
 
247
 
248
  # merge updated paragraphs with non-updated paragraphs
249
  paragraphs_merged = data_test.copy()
@@ -266,11 +282,7 @@ def main(input_article, input_trigger):
266
 
267
  # combine the predictions and paragraphs into csv format file
268
  merged_par_pred_df = pd.DataFrame({"paragraphs": data_test, "predictions": predictions}).to_csv("./util/experiments/par_with_class.csv")
269
- # return updated_article, modified, merged_par_pred_df
270
  modified_in_all = str(len(paragraphs_needed)) + " / " + str(len(data_test))
271
-
272
- os.remove("./util/experiments/classification.csv")
273
- os.remove("./util/experiments/paragraphs_needed.csv")
274
  return updated_article, modified_in_all
275
 
276
  def copy_to_clipboard(t):
@@ -279,12 +291,12 @@ def copy_to_clipboard(t):
279
  pyperclip.copy(t)
280
 
281
  def compare_versions():
282
- old, new = [], []
283
- with open("./util/experiments/formatted_input.txt", "r") as f:
284
  old = f.read()
285
  old = old.replace("[ADD]", "")
286
- with open("./util/experiments/updated_article.txt", "r") as f:
287
  new = f.read()
 
288
  return old, new
289
 
290
  with open("./examples/non_update.txt", "r") as f:
@@ -328,8 +340,8 @@ with gr.Blocks() as demo:
328
  fn=main,
329
  inputs=[input_1, input_2],
330
  outputs=[output_1, output_2],
331
- cache_examples=True,
332
- run_on_click=True,
333
  ),
334
  com_1_value, com_2_value = "Pls finish article updating, then click the button above", "Pls finish article updating, then click the button above."
335
  with gr.Tab("Compare between versions"):
@@ -338,10 +350,6 @@ with gr.Blocks() as demo:
338
  com_1 = gr.Textbox(label="Non-update Article", value=com_1_value, lines=15)
339
  com_2 = gr.Textbox(label="Updated Article", value=com_2_value, lines=15)
340
  btn_com.click(fn=compare_versions, inputs=[], outputs=[com_1, com_2])
341
- formatted_input_path = "./util/experiments/formatted_input.txt"
342
- updated_article_path = "./util/experiments/updated_article.txt"
343
- if os.path.isfile(formatted_input_path): os.remove(formatted_input_path)
344
- if os.path.isfile(updated_article_path): os.remove(updated_article_path)
345
  gr.HTML("""
346
  <div align="center">
347
  <p>
@@ -357,3 +365,4 @@ with gr.Blocks() as demo:
357
  )
358
 
359
  demo.launch()
 
 
115
  paragraphs = article.replace("\\c\\c", "\c\c").split("\\\\c\\\\c")
116
  format_pars = [par for par in paragraphs]
117
  pars = [str(par) + " -- " + str(trigger) for par in paragraphs]
 
118
  formatted_input = "\n".join(format_pars)
119
  return pars, formatted_input
120
 
 
122
  load_dotenv()
123
 
124
  def call_gpt(paragraph, trigger):
125
+ openai.api_key = os.environ.get("GPT_API")
 
126
  tokenizer = BartTokenizer.from_pretrained("theQuert/NetKUp-tokenzier")
127
  inputs_for_gpt = f"""
128
+ s an article writer, your task is to provide an updated paragraph in the length same as non-updated paragraph based on the given non-updated paragraph and a triggered news.Remember, the length of updated paragraph is restricted into a single paragraph.
129
  Non-updated paragraph:
130
  {paragraph}
131
 
132
  Triggered News:
133
  {trigger}
134
  """
 
 
 
135
  completion = openai.ChatCompletion.create(
136
  model = "gpt-3.5-turbo",
137
  messages = [
 
139
  ]
140
  )
141
  response = completion.choices[0].message.content
142
+ if "<"+response.split("<")[-1].strip() == "<"+paragraph.split("<")[-1].strip(): response = response
143
+ else: response = response + " <"+paragraph.split("<")[-1].strip()
144
  return str(response)
145
 
146
  def call_vicuna(paragraphs_tirgger):
 
162
 
163
 
164
  def main(input_article, input_trigger):
165
+ paths = [".util/experiments/input_paragraphs.csv",
166
+ "./util.experiments/formatted_input.txt",
167
+ "./util/experiments/updated_article.txt",
168
+ "./util/experiments/paragraphs_needed.txt",
169
+ "./util/experiments/updated_paragraphs.txt",
170
+ "./util/experiments/paragraphs_with_prompts.csv",
171
+ "./util/experiments/classification.csv",
172
+ "./util/experiments/paragraphs_needed.csv",
173
+ "./util/experiments/par_with_class.csv"]
174
+ for path in paths:
175
+ try:
176
+ if os.path.isfile(path): os.remove(path)
177
+ except: pass
178
  modified = "TRUE"
179
  # device = "cuda" if torch.cuda.is_available() else "cpu"
180
  device="cpu"
 
245
 
246
  # feed the positive paragraphs to decoder
247
  paragraphs_needed = [data_test[idx] for idx in pos_ids]
248
+ paragraphs_needed = [par.split(" -- ")[0].replace("[ADD]", "") for par in paragraphs_needed]
249
  pd.DataFrame({"paragraph": paragraphs_needed}).to_csv("./util/experiments/paragraphs_needed.csv", index=False)
250
+ paragraphs_needed_str = "\n\n".join(paragraphs_needed)
251
+ # paragraphs_needed_str = paragraphs_needed_str.replace("Updated Paragraph:\n", "")
252
+ with open("./util/experiments/paragraphs_needed.txt", "w") as f:
253
+ f.write(paragraphs_needed_str)
254
 
255
  # updated_paragraphs = decode(input_paragraph, input_trigger)
 
 
256
  # updated_paragraphs = call_vicuna(paragraphs_needed, input_trigger)
257
+ config()
258
+ updated_paragraphs = [call_gpt(paragraph, input_trigger) for paragraph in paragraphs_needed]
259
+ updated_paragraphs_str = "\n\n".join(updated_paragraphs)
260
+ updated_paragraphs_str = updated_paragraphs_str.replace("Updated Paragraph:\n", "")
261
+ with open("./util/experiments/updated_paragraphs.txt", "w") as f:
262
+ f.write(updated_paragraphs_str)
263
 
264
  # merge updated paragraphs with non-updated paragraphs
265
  paragraphs_merged = data_test.copy()
 
282
 
283
  # combine the predictions and paragraphs into csv format file
284
  merged_par_pred_df = pd.DataFrame({"paragraphs": data_test, "predictions": predictions}).to_csv("./util/experiments/par_with_class.csv")
 
285
  modified_in_all = str(len(paragraphs_needed)) + " / " + str(len(data_test))
 
 
 
286
  return updated_article, modified_in_all
287
 
288
  def copy_to_clipboard(t):
 
291
  pyperclip.copy(t)
292
 
293
  def compare_versions():
294
+ with open("./util/experiments/paragraphs_needed.txt", "r") as f:
 
295
  old = f.read()
296
  old = old.replace("[ADD]", "")
297
+ with open("./util/experiments/updated_paragraphs.txt", "r") as f:
298
  new = f.read()
299
+ new = new.replace("[ADD]", "")
300
  return old, new
301
 
302
  with open("./examples/non_update.txt", "r") as f:
 
340
  fn=main,
341
  inputs=[input_1, input_2],
342
  outputs=[output_1, output_2],
343
+ # cache_examples=True,
344
+ # run_on_click=True,
345
  ),
346
  com_1_value, com_2_value = "Pls finish article updating, then click the button above", "Pls finish article updating, then click the button above."
347
  with gr.Tab("Compare between versions"):
 
350
  com_1 = gr.Textbox(label="Non-update Article", value=com_1_value, lines=15)
351
  com_2 = gr.Textbox(label="Updated Article", value=com_2_value, lines=15)
352
  btn_com.click(fn=compare_versions, inputs=[], outputs=[com_1, com_2])
 
 
 
 
353
  gr.HTML("""
354
  <div align="center">
355
  <p>
 
365
  )
366
 
367
  demo.launch()
368
+