Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

eljanmahammadli commited on Aug 6, 2024

Commit

a54c1ef

1 Parent(s): b72ef7f

speed up humanizer: batch generation

Browse files

Files changed (2) hide show

app.py +3 -1
humanize.py +67 -59

app.py CHANGED Viewed

@@ -35,6 +35,9 @@ tokenizers = {
     "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
 # Function to move model to the appropriate device
 def to_device(model):
@@ -99,7 +102,6 @@ def ends_with_references(text):
 def format_and_correct_language_check(text: str) -> str:
-    tool = language_tool_python.LanguageTool("en-US")
     return tool.correct(text)

     "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
+# grammar correction tool
+tool = language_tool_python.LanguageTool("en-US")
 # Function to move model to the appropriate device
 def to_device(model):
 def format_and_correct_language_check(text: str) -> str:
     return tool.correct(text)

humanize.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import torch
 from nltk import sent_tokenize
 import nltk
 from tqdm import tqdm
 import gradio as gr
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 nltk.download("punkt")
@@ -18,35 +20,46 @@ else:
     print("CUDA is not available. Using CPU instead.")
     device = torch.device("cpu")
 # Configuration for models and their adapters
 model_config = {
     "Base Model": "polygraf-ai/poly-humanizer-base",
     "Large Model": "polygraf-ai/poly-humanizer-large",
-    "XL Model": {
-        "path": "google/flan-t5-xl",
-        "adapters": {
-            "XL Model Adapter": "polygraf-ai/poly-humanizer-XL-adapter",
-            # "XL Law Model Adapter": "polygraf-ai/poly-humanizer-XL-law-adapter",
-            # "XL Marketing Model Adapter": "polygraf-ai/marketing-cleaned-13K-grad-acum-4-full",
-            # "XL Child Style Model Adapter": "polygraf-ai/poly-humanizer-XL-children-adapter-checkpoint-4000",
-        },
-    },
 }
 # cache the base models, tokenizers, and adapters
 models, tokenizers = {}, {}
-for name, config in model_config.items():
-    path = config if isinstance(config, str) else config["path"]
-    # initialize model and tokenizer
-    model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
-    models[name] = model
-    tokenizers[name] = T5Tokenizer.from_pretrained(path)
-    # load all avalable adapters, each being additional roughly 150M parameters
-    if isinstance(config, dict) and "adapters" in config:
-        for adapter_name, adapter_path in config["adapters"].items():
-            model.load_adapter(adapter_path, adapter_name=adapter_name)
-            print(f"Loaded adapter: {adapter_name}, Num. params: {model.num_parameters()}")
 def paraphrase_text(
@@ -58,51 +71,46 @@ def paraphrase_text(
     top_k=50,
     length_penalty=1.0,
 ):
     progress(0, desc="Starting to Humanize")
     progress(0.05)
-    # select the model, tokenizer and adapter
-    if "XL" in model_name:  # dynamic adapter load/unload for XL models
-        # all adapter models use the XL model as the base
-        tokenizer, model = tokenizers["XL Model"], models["XL Model"]
-        # set the adapter if it's not already set
-        if model.active_adapters() != [f"{model_name} Adapter"]:
-            model.set_adapter(f"{model_name} Adapter")
-            print(f"Using adapter: {model_name} Adapter")
-    else:
-        tokenizer = tokenizers[model_name]
-        model = models[model_name]
-    # Split the text into paragraphs
     paragraphs = text.split("\n")
-    humanized_paragraphs = []
-    for paragraph in progress.tqdm(paragraphs, desc="Humanizing"):
-        # paraphrase each chunk of text
         sentences = sent_tokenize(paragraph)
-        paraphrases = []
-        for sentence in sentences:
-            sentence = sentence.strip()
-            if len(sentence) == 0:
-                continue
-            inputs = tokenizer(
-                "Please paraphrase this sentence: " + sentence,
-                return_tensors="pt",
-            ).to(device)
-            outputs = model.generate(
-                **inputs,
-                do_sample=True,
-                temperature=temperature,
-                repetition_penalty=repetition_penalty,
-                max_length=128,
-                top_k=top_k,
-                length_penalty=length_penalty,
-            )
-            paraphrased_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            paraphrases.append(paraphrased_sentence)
-            print(f"\nOriginal: {sentence}")
-            print(f"Paraphrased: {paraphrased_sentence}")
-        combined_paraphrase = " ".join(paraphrases)
-        humanized_paragraphs.append(combined_paraphrase)
     humanized_text = "\n".join(humanized_paragraphs)
     return humanized_text

+import gc
 import torch
 from nltk import sent_tokenize
 import nltk
 from tqdm import tqdm
 import gradio as gr
+from peft import PeftModel
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 nltk.download("punkt")
     print("CUDA is not available. Using CPU instead.")
     device = torch.device("cpu")
+batch_size = 64
 # Configuration for models and their adapters
 model_config = {
     "Base Model": "polygraf-ai/poly-humanizer-base",
     "Large Model": "polygraf-ai/poly-humanizer-large",
+    "XL Model": "polygraf-ai/poly-humanizer-XL-adapter",
 }
 # cache the base models, tokenizers, and adapters
+# initialize model and tokenizer
 models, tokenizers = {}, {}
+for name, path in model_config.items():
+    if name == "XL Model":
+        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", torch_dtype=torch.bfloat16).to(device)
+        model = PeftModel.from_pretrained(model, path, torch_dtype=torch.bfloat16, is_trainable=False)
+        model = model.merge_and_unload()
+        models[name] = model
+        tokenizers[name] = T5Tokenizer.from_pretrained("google/flan-t5-xl")
+    else:
+        model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
+        models[name] = model
+        tokenizers[name] = T5Tokenizer.from_pretrained(path)
+    print(f"Loaded model: {name}, Num. params: {model.num_parameters()}")
+def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
+    inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
+    inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
+    outputs = model.generate(
+        **inputs,
+        do_sample=True,
+        temperature=temperature,
+        repetition_penalty=repetition_penalty,
+        max_length=128,
+        top_k=top_k,
+        length_penalty=length_penalty,
+    )
+    answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
+    return answers
 def paraphrase_text(
     top_k=50,
     length_penalty=1.0,
 ):
+    """
+    Optimization here is to feed all sentences at once to the model.
+    Paragraphs are stored as a number of sentences per paragraph.
+    """
     progress(0, desc="Starting to Humanize")
     progress(0.05)
+    # Select the model, tokenizer, and adapter
+    tokenizer = tokenizers[model_name]
+    model = models[model_name].to(device)
+    # Split the text into paragraphs and then into sentences
     paragraphs = text.split("\n")
+    all_sentences = []
+    sentences_per_paragraph = []
+    for paragraph in paragraphs:
         sentences = sent_tokenize(paragraph)
+        sentences_per_paragraph.append(len(sentences))
+        all_sentences.extend(sentences)
+    # Process all sentences in batches
+    paraphrased_sentences = []
+    for i in range(0, len(all_sentences), batch_size):
+        batch_sentences = all_sentences[i : i + batch_size]
+        paraphrased_batch = paraphrase_sentences(
+            model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty
+        )
+        paraphrased_sentences.extend(paraphrased_batch)
+        # Clear memory
+        torch.cuda.empty_cache()
+        gc.collect()
+    # Reconstruct paragraphs
+    humanized_paragraphs = []
+    sentence_index = 0
+    for num_sentences in sentences_per_paragraph:
+        humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences])
+        humanized_paragraphs.append(humanized_paragraph)
+        sentence_index += num_sentences
     humanized_text = "\n".join(humanized_paragraphs)
     return humanized_text