Spaces:
Runtime error
Runtime error
import gc | |
import torch | |
from nltk import sent_tokenize | |
import nltk | |
from tqdm import tqdm | |
import gradio as gr | |
from peft import PeftModel | |
from transformers import T5ForConditionalGeneration, T5Tokenizer | |
nltk.download("punkt") | |
# autodetect the available device | |
GPU_IDX = 1 # which GPU to use | |
if torch.cuda.is_available(): | |
num_gpus = torch.cuda.device_count() | |
print(f"Number of available GPUs: {num_gpus}") | |
assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available." | |
device = torch.device(f"cuda:{GPU_IDX}") | |
print(f"Using GPU: {GPU_IDX}") | |
else: | |
print("CUDA is not available. Using CPU instead.") | |
device = torch.device("cpu") | |
batch_size = 64 | |
# Configuration for models and their adapters | |
model_config = { | |
"Base Model": "polygraf-ai/poly-humanizer-base", | |
"Large Model": "polygraf-ai/poly-humanizer-large", | |
"XL Model": "polygraf-ai/poly-humanizer-XL-merged-v2", | |
} | |
# cache the base models, tokenizers, and adapters | |
# initialize model and tokenizer | |
models, tokenizers = {}, {} | |
for name, path in model_config.items(): | |
model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device) | |
tokenizers[name] = T5Tokenizer.from_pretrained(path) | |
models[name] = model | |
print(f"Loaded model: {name}, Num. params: {model.num_parameters()}") | |
def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty): | |
inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences] | |
inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device) | |
outputs = model.generate( | |
**inputs, | |
do_sample=True, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
max_length=128, | |
top_k=top_k, | |
length_penalty=length_penalty, | |
) | |
answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs] | |
return answers | |
def paraphrase_text( | |
text, | |
progress=gr.Progress(), | |
model_name="Base Model", | |
temperature=1.2, | |
repetition_penalty=1.0, | |
top_k=50, | |
length_penalty=1.0, | |
): | |
""" | |
Optimization here is to feed all sentences at once to the model. | |
Paragraphs are stored as a number of sentences per paragraph. | |
""" | |
progress(0, desc="Starting to Humanize") | |
# Select the model, tokenizer, and adapter | |
tokenizer = tokenizers[model_name] | |
model = models[model_name].to(device) | |
# Split the text into paragraphs and then into sentences | |
paragraphs = text.split("\n") | |
all_sentences = [] | |
sentences_per_paragraph = [] | |
for paragraph in paragraphs: | |
sentences = sent_tokenize(paragraph) | |
sentences_per_paragraph.append(len(sentences)) | |
all_sentences.extend(sentences) | |
# Process all sentences in batches | |
paraphrased_sentences = [] | |
for i in progress.tqdm(range(0, len(all_sentences), batch_size)): | |
batch_sentences = all_sentences[i : i + batch_size] | |
paraphrased_batch = paraphrase_sentences( | |
model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty | |
) | |
paraphrased_sentences.extend(paraphrased_batch) | |
# Clear memory | |
torch.cuda.empty_cache() | |
gc.collect() | |
# Reconstruct paragraphs | |
humanized_paragraphs = [] | |
sentence_index = 0 | |
for num_sentences in sentences_per_paragraph: | |
humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences]) | |
humanized_paragraphs.append(humanized_paragraph) | |
sentence_index += num_sentences | |
humanized_text = "\n".join(humanized_paragraphs) | |
return humanized_text | |