Spaces:

polygraf-ai
/

article_writer

Runtime error

minko186 commited on Jul 23, 2024

Commit

20d4ded

1 Parent(s): 20dc449

keep para structure

Files changed (1) hide show

humanize.py CHANGED Viewed

@@ -71,27 +71,37 @@ def paraphrase_text(
         tokenizer = tokenizers[model_name]
         model = models[model_name]
-    # paraphrase each chunk of text
-    sentences = sent_tokenize(text)  # sentence boundary detection
-    paraphrases = []
-    for sentence in progress.tqdm(sentences, desc="Humanizing"):
-        sentence = sentence.strip()
-        if len(sentence) == 0:
-            continue
-        inputs = tokenizer("Please paraphrase this sentence: " + sentence, return_tensors="pt").to(device)
-        outputs = model.generate(
-            **inputs,
-            do_sample=True,
-            temperature=temperature,
-            repetition_penalty=repetition_penalty,
-            max_length=128,
-            top_k=top_k,
-            length_penalty=length_penalty,
-        )
-        paraphrased_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        paraphrases.append(paraphrased_sentence)
-        print(f"\nOriginal: {sentence}")
-        print(f"Paraphrased: {paraphrased_sentence}")
-    combined_paraphrase = " ".join(paraphrases)
-    return combined_paraphrase

         tokenizer = tokenizers[model_name]
         model = models[model_name]
+    # Split the text into paragraphs
+    paragraphs = text.split("\n")
+    humanized_paragraphs = []
+    for paragraph in paragraphs:
+        # paraphrase each chunk of text
+        sentences = sent_tokenize(paragraph)
+        paraphrases = []
+        for sentence in progress.tqdm(sentences, desc="Humanizing"):
+            sentence = sentence.strip()
+            if len(sentence) == 0:
+                continue
+            inputs = tokenizer(
+                "Please paraphrase this sentence: " + sentence,
+                return_tensors="pt",
+            ).to(device)
+            outputs = model.generate(
+                **inputs,
+                do_sample=True,
+                temperature=temperature,
+                repetition_penalty=repetition_penalty,
+                max_length=128,
+                top_k=top_k,
+                length_penalty=length_penalty,
+            )
+            paraphrased_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            paraphrases.append(paraphrased_sentence)
+            print(f"\nOriginal: {sentence}")
+            print(f"Paraphrased: {paraphrased_sentence}")
+        combined_paraphrase = " ".join(paraphrases)
+        humanized_paragraphs.append(combined_paraphrase)
+    humanized_text = "\n".join(humanized_paragraphs)
+    return humanized_text