article_writer / humanize.py
eljanmahammadli's picture
merged adapter and base model for XL
924cb86
raw
history blame
3.71 kB
import gc
import torch
from nltk import sent_tokenize
import nltk
from tqdm import tqdm
import gradio as gr
from peft import PeftModel
from transformers import T5ForConditionalGeneration, T5Tokenizer
nltk.download("punkt")
# autodetect the available device
GPU_IDX = 1 # which GPU to use
if torch.cuda.is_available():
num_gpus = torch.cuda.device_count()
print(f"Number of available GPUs: {num_gpus}")
assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
device = torch.device(f"cuda:{GPU_IDX}")
print(f"Using GPU: {GPU_IDX}")
else:
print("CUDA is not available. Using CPU instead.")
device = torch.device("cpu")
batch_size = 64
# Configuration for models and their adapters
model_config = {
"Base Model": "polygraf-ai/poly-humanizer-base",
"Large Model": "polygraf-ai/poly-humanizer-large",
"XL Model": "polygraf-ai/poly-humanizer-XL-merged-v2",
}
# cache the base models, tokenizers, and adapters
# initialize model and tokenizer
models, tokenizers = {}, {}
for name, path in model_config.items():
model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
tokenizers[name] = T5Tokenizer.from_pretrained(path)
models[name] = model
print(f"Loaded model: {name}, Num. params: {model.num_parameters()}")
def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
outputs = model.generate(
**inputs,
do_sample=True,
temperature=temperature,
repetition_penalty=repetition_penalty,
max_length=128,
top_k=top_k,
length_penalty=length_penalty,
)
answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
return answers
def paraphrase_text(
text,
progress=gr.Progress(),
model_name="Base Model",
temperature=1.2,
repetition_penalty=1.0,
top_k=50,
length_penalty=1.0,
):
"""
Optimization here is to feed all sentences at once to the model.
Paragraphs are stored as a number of sentences per paragraph.
"""
progress(0, desc="Starting to Humanize")
# Select the model, tokenizer, and adapter
tokenizer = tokenizers[model_name]
model = models[model_name].to(device)
# Split the text into paragraphs and then into sentences
paragraphs = text.split("\n")
all_sentences = []
sentences_per_paragraph = []
for paragraph in paragraphs:
sentences = sent_tokenize(paragraph)
sentences_per_paragraph.append(len(sentences))
all_sentences.extend(sentences)
# Process all sentences in batches
paraphrased_sentences = []
for i in progress.tqdm(range(0, len(all_sentences), batch_size)):
batch_sentences = all_sentences[i : i + batch_size]
paraphrased_batch = paraphrase_sentences(
model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty
)
paraphrased_sentences.extend(paraphrased_batch)
# Clear memory
torch.cuda.empty_cache()
gc.collect()
# Reconstruct paragraphs
humanized_paragraphs = []
sentence_index = 0
for num_sentences in sentences_per_paragraph:
humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences])
humanized_paragraphs.append(humanized_paragraph)
sentence_index += num_sentences
humanized_text = "\n".join(humanized_paragraphs)
return humanized_text