Spaces:

polygraf-ai
/

article_writer

Runtime error

File size: 8,522 Bytes

import gc
import torch
import nltk
from nltk import sent_tokenize
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import language_tool_python
import re

nltk.download("punkt")

GPU_IDX = 1  # which GPU to use, starts from 0
BATCH_SIZE = 64  # number of sentences to process in one batch

# autodetect the available device
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")
    assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
    device = torch.device(f"cuda:{GPU_IDX}")
    print(f"Using GPU: {GPU_IDX}")
else:
    print("CUDA is not available. Using CPU instead.")
    device = torch.device("cpu")

# ----------------------------
# load encoder-decoder (sequence to sequence) language model
# seq2seq = "polygraf-ai/poly-humanizer-XL-merged-v2"
# seq2seq_model = T5ForConditionalGeneration.from_pretrained(seq2seq, torch_dtype=torch.bfloat16).to(device)
# seq2seq_tokenizer = T5Tokenizer.from_pretrained(seq2seq)
# print(f"Loaded model: {seq2seq}, Num. params: {seq2seq_model.num_parameters()}")
seq2seq_model = None
seq2seq_tokenizer = None
# ----------------------------
# load decoder-only (causal) language model
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template

# can only use GPU 0 when using unsloth FastLanguageModel
max_seq_length = 2048  # any can be chosed since RoPE Scaling is used
dtype = None  # None for auto detection. Float16for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True  # Use 4bit quantization to reduce memory usage
dec_only = "polygraf-ai/phi-3-mini-rank-128"
dec_only_model, dec_only_tokenizer = FastLanguageModel.from_pretrained(
    model_name=dec_only,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    device_map="cuda:0",
)
FastLanguageModel.for_inference(dec_only_model)  # native 2x faster inference
print(f"Loaded model: {dec_only}, Num. params: {dec_only_model.num_parameters()}")


# grammar correction tool
tool = language_tool_python.LanguageTool("en-US")


def format_and_correct_language_check(text: str) -> str:
    return tool.correct(text)


def extract_citations(text):
    citations = re.findall(r"<(\d+)>", text)
    return [int(citation) for citation in citations]


def remove_citations(text):
    text = re.sub(r"<\d+>", "", text)
    text = re.sub(r"[\d+]", "", text)
    return text


def humanize_batch_seq2seq(
    model,
    tokenizer,
    sentences,
    temperature,
    repetition_penalty,
    top_k,
    length_penalty,
):
    inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
    inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
    outputs = model.generate(
        **inputs,
        do_sample=True,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
        max_length=128,
        top_k=top_k,
        length_penalty=length_penalty,
    )
    answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
    return answers


def humanize_batch_decoder_only(
    model,
    tokenizer,
    sentences,
    temperature,
    repetition_penalty,
    top_k,
    length_penalty,
):
    pre_prompt = "As a humanizer model, your task is to rewrite the following sentence to make it more human-like. Return only the paraphrased sentence. \n\n"
    # Construct the messages_batch using the tokenized sentences
    messages_batch = [{"from": "human", "value": f"{pre_prompt}{sentence}"} for sentence in sentences]
    # Initialize the tokenizer with the chat template
    tokenizer = get_chat_template(
        tokenizer,
        chat_template="phi-3",
        mapping={
            "role": "from",
            "content": "value",
            "user": "human",
            "assistant": "gpt",
        },  # ShareGPT style
    )

    # Enable native 2x faster inference
    FastLanguageModel.for_inference(model)
    # Initialize an empty list to store responses
    responses = []
    # Process each message individually
    for message in messages_batch:
        # Apply the chat template to the individual message
        inputs = tokenizer.apply_chat_template(
            [message],  # Wrap the message in a list
            tokenize=True,
            add_generation_prompt=True,  # Must add for generation
            return_tensors="pt",
        ).to("cuda")
        # Generate the response for the individual message
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=1024,
            use_cache=True,
            do_sample=True,
            temperature=temperature,
            repetition_penalty=repetition_penalty,
            top_k=top_k,
            length_penalty=length_penalty,
        )
        # Decode the output and store it
        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=False)
        responses.append(decoded_output[0])

    # Print or return the responses
    generated_sentences = []
    for idx, response in enumerate(responses):
        generated_sentence = response.split("<|assistant|>")[1].split("<|end|>")[0].strip()
        generated_sentences.append(generated_sentence)
        print(sentences[idx])
        print(generated_sentence)
        print()

    return generated_sentences


def humanize_text(
    text,
    progress=gr.Progress(),
    model_name="Standard Model",
    temperature=1.2,
    repetition_penalty=1.0,
    top_k=50,
    length_penalty=1.0,
):
    """
    Optimization here is to feed all sentences at once to the model.
    Paragraphs are stored as a number of sentences per paragraph.
    """
    progress(0, desc="Starting to Humanize")
    # Map model names to their respective processing functions
    model_map = {
        "Standard Model": humanize_batch_seq2seq,
        "Advanced Model (Beta)": humanize_batch_decoder_only,
    }
    assert model_name in model_map, f"Invalid model name: {model_name}"
    process_function = model_map[model_name]

    # Split the text into paragraphs and then into sentences
    paragraphs = text.split("\n")
    all_sentences = []
    sentences_per_paragraph = []
    citations_per_paragraph = []
    for paragraph in paragraphs:
        citations_per_paragraph.append(extract_citations(paragraph))
        paragraph = remove_citations(paragraph)
        sentences = sent_tokenize(paragraph)
        sentences_per_paragraph.append(len(sentences))
        all_sentences.extend(sentences)

    # Process all sentences in batches
    paraphrased_sentences = []
    current_batch_size = BATCH_SIZE
    i = 0

    while i < len(all_sentences):
        try:
            batch_sentences = all_sentences[i : i + current_batch_size]

            # Call the selected processing function
            paraphrased_batch = process_function(
                (seq2seq_model if model_name == "Standard Model" else dec_only_model),
                (seq2seq_tokenizer if model_name == "Standard Model" else dec_only_tokenizer),
                batch_sentences,
                temperature,
                repetition_penalty,
                top_k,
                length_penalty,
            )

            paraphrased_sentences.extend(paraphrased_batch)
            i += current_batch_size  # Move to the next batch
            torch.cuda.empty_cache()
            gc.collect()
            progress.update(i / len(all_sentences))

        except RuntimeError as e:
            if "out of memory" in str(e):
                # Reduce the batch size by half and retry
                current_batch_size = max(1, current_batch_size // 2)
                print(f"Out of memory, reducing batch size to {current_batch_size}. Retrying...")
                torch.cuda.empty_cache()
                gc.collect()
            else:
                raise e

    # Reconstruct paragraphs
    humanized_paragraphs = []
    sentence_index = 0
    for num_sentences in sentences_per_paragraph:
        humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences])
        humanized_paragraphs.append(humanized_paragraph)
        sentence_index += num_sentences
    for i, paragraph in enumerate(humanized_paragraphs):
        citation_texts = [f"<{cid}>" for cid in citations_per_paragraph[i]]
        humanized_paragraphs[i] = paragraph + " " + "".join(citation_texts)
    humanized_text = "\n\n".join(humanized_paragraphs)
    return humanized_text