Spaces:
Runtime error
Runtime error
File size: 8,522 Bytes
078999d 24a982b 078999d e2a79fa 078999d c1769c1 24a0ba5 078999d e2a79fa 078999d e2a79fa 744d9e3 e2a79fa 9d1ac35 e2a79fa 078999d c1769c1 24a0ba5 c1769c1 24a0ba5 078999d a54c1ef 078999d a54c1ef 078999d 24a0ba5 e2a79fa 24a0ba5 e2a79fa 078999d e2a79fa 078999d 42f072b 078999d e2a79fa 24a0ba5 e2a79fa 24a982b 078999d 24a0ba5 078999d 24a0ba5 078999d e2a79fa 24a0ba5 e2a79fa 078999d e2a79fa 078999d 24a0ba5 078999d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
import gc
import torch
import nltk
from nltk import sent_tokenize
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import language_tool_python
import re
nltk.download("punkt")
GPU_IDX = 1 # which GPU to use, starts from 0
BATCH_SIZE = 64 # number of sentences to process in one batch
# autodetect the available device
if torch.cuda.is_available():
num_gpus = torch.cuda.device_count()
print(f"Number of available GPUs: {num_gpus}")
assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
device = torch.device(f"cuda:{GPU_IDX}")
print(f"Using GPU: {GPU_IDX}")
else:
print("CUDA is not available. Using CPU instead.")
device = torch.device("cpu")
# ----------------------------
# load encoder-decoder (sequence to sequence) language model
# seq2seq = "polygraf-ai/poly-humanizer-XL-merged-v2"
# seq2seq_model = T5ForConditionalGeneration.from_pretrained(seq2seq, torch_dtype=torch.bfloat16).to(device)
# seq2seq_tokenizer = T5Tokenizer.from_pretrained(seq2seq)
# print(f"Loaded model: {seq2seq}, Num. params: {seq2seq_model.num_parameters()}")
seq2seq_model = None
seq2seq_tokenizer = None
# ----------------------------
# load decoder-only (causal) language model
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
# can only use GPU 0 when using unsloth FastLanguageModel
max_seq_length = 2048 # any can be chosed since RoPE Scaling is used
dtype = None # None for auto detection. Float16for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage
dec_only = "polygraf-ai/phi-3-mini-rank-128"
dec_only_model, dec_only_tokenizer = FastLanguageModel.from_pretrained(
model_name=dec_only,
max_seq_length=max_seq_length,
dtype=dtype,
load_in_4bit=load_in_4bit,
device_map="cuda:0",
)
FastLanguageModel.for_inference(dec_only_model) # native 2x faster inference
print(f"Loaded model: {dec_only}, Num. params: {dec_only_model.num_parameters()}")
# grammar correction tool
tool = language_tool_python.LanguageTool("en-US")
def format_and_correct_language_check(text: str) -> str:
return tool.correct(text)
def extract_citations(text):
citations = re.findall(r"<(\d+)>", text)
return [int(citation) for citation in citations]
def remove_citations(text):
text = re.sub(r"<\d+>", "", text)
text = re.sub(r"[\d+]", "", text)
return text
def humanize_batch_seq2seq(
model,
tokenizer,
sentences,
temperature,
repetition_penalty,
top_k,
length_penalty,
):
inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
outputs = model.generate(
**inputs,
do_sample=True,
temperature=temperature,
repetition_penalty=repetition_penalty,
max_length=128,
top_k=top_k,
length_penalty=length_penalty,
)
answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
return answers
def humanize_batch_decoder_only(
model,
tokenizer,
sentences,
temperature,
repetition_penalty,
top_k,
length_penalty,
):
pre_prompt = "As a humanizer model, your task is to rewrite the following sentence to make it more human-like. Return only the paraphrased sentence. \n\n"
# Construct the messages_batch using the tokenized sentences
messages_batch = [{"from": "human", "value": f"{pre_prompt}{sentence}"} for sentence in sentences]
# Initialize the tokenizer with the chat template
tokenizer = get_chat_template(
tokenizer,
chat_template="phi-3",
mapping={
"role": "from",
"content": "value",
"user": "human",
"assistant": "gpt",
}, # ShareGPT style
)
# Enable native 2x faster inference
FastLanguageModel.for_inference(model)
# Initialize an empty list to store responses
responses = []
# Process each message individually
for message in messages_batch:
# Apply the chat template to the individual message
inputs = tokenizer.apply_chat_template(
[message], # Wrap the message in a list
tokenize=True,
add_generation_prompt=True, # Must add for generation
return_tensors="pt",
).to("cuda")
# Generate the response for the individual message
outputs = model.generate(
input_ids=inputs,
max_new_tokens=1024,
use_cache=True,
do_sample=True,
temperature=temperature,
repetition_penalty=repetition_penalty,
top_k=top_k,
length_penalty=length_penalty,
)
# Decode the output and store it
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=False)
responses.append(decoded_output[0])
# Print or return the responses
generated_sentences = []
for idx, response in enumerate(responses):
generated_sentence = response.split("<|assistant|>")[1].split("<|end|>")[0].strip()
generated_sentences.append(generated_sentence)
print(sentences[idx])
print(generated_sentence)
print()
return generated_sentences
def humanize_text(
text,
progress=gr.Progress(),
model_name="Standard Model",
temperature=1.2,
repetition_penalty=1.0,
top_k=50,
length_penalty=1.0,
):
"""
Optimization here is to feed all sentences at once to the model.
Paragraphs are stored as a number of sentences per paragraph.
"""
progress(0, desc="Starting to Humanize")
# Map model names to their respective processing functions
model_map = {
"Standard Model": humanize_batch_seq2seq,
"Advanced Model (Beta)": humanize_batch_decoder_only,
}
assert model_name in model_map, f"Invalid model name: {model_name}"
process_function = model_map[model_name]
# Split the text into paragraphs and then into sentences
paragraphs = text.split("\n")
all_sentences = []
sentences_per_paragraph = []
citations_per_paragraph = []
for paragraph in paragraphs:
citations_per_paragraph.append(extract_citations(paragraph))
paragraph = remove_citations(paragraph)
sentences = sent_tokenize(paragraph)
sentences_per_paragraph.append(len(sentences))
all_sentences.extend(sentences)
# Process all sentences in batches
paraphrased_sentences = []
current_batch_size = BATCH_SIZE
i = 0
while i < len(all_sentences):
try:
batch_sentences = all_sentences[i : i + current_batch_size]
# Call the selected processing function
paraphrased_batch = process_function(
(seq2seq_model if model_name == "Standard Model" else dec_only_model),
(seq2seq_tokenizer if model_name == "Standard Model" else dec_only_tokenizer),
batch_sentences,
temperature,
repetition_penalty,
top_k,
length_penalty,
)
paraphrased_sentences.extend(paraphrased_batch)
i += current_batch_size # Move to the next batch
torch.cuda.empty_cache()
gc.collect()
progress.update(i / len(all_sentences))
except RuntimeError as e:
if "out of memory" in str(e):
# Reduce the batch size by half and retry
current_batch_size = max(1, current_batch_size // 2)
print(f"Out of memory, reducing batch size to {current_batch_size}. Retrying...")
torch.cuda.empty_cache()
gc.collect()
else:
raise e
# Reconstruct paragraphs
humanized_paragraphs = []
sentence_index = 0
for num_sentences in sentences_per_paragraph:
humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences])
humanized_paragraphs.append(humanized_paragraph)
sentence_index += num_sentences
for i, paragraph in enumerate(humanized_paragraphs):
citation_texts = [f"<{cid}>" for cid in citations_per_paragraph[i]]
humanized_paragraphs[i] = paragraph + " " + "".join(citation_texts)
humanized_text = "\n\n".join(humanized_paragraphs)
return humanized_text
|