quble_model_v1_pretrain

Sleeping

File size: 2,005 Bytes

5fba34b
 
7165422
 
5fba34b
 
 
 
2d0f9fd
5fba34b
 
 
 
 
 
48b3788
5fba34b
 
 
48b3788
5fba34b
48b3788
5fba34b
48b3788
5fba34b
48b3788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fba34b
48b3788
 
 
 
 
7165422
 
5fba34b

import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import gradio as gr

# Check if a GPU is available and use it, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained model and tokenizer from the saved directory
model_path = "Blexus/Quble_Test_Model_v1_Pretrain"
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path).to(device)

# Set model to evaluation mode
model.eval()

# Function to generate text in a stream-based manner
def generate_text(prompt):
    # Tokenize and encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    max_length = 50  # Maximum length of generated text

    # Generate continuation with streaming tokens
    with torch.no_grad():
        for generated_ids in model.generate(
            input_ids, 
            max_length=max_length, 
            num_return_sequences=1, 
            pad_token_id=tokenizer.eos_token_id, 
            do_sample=True, 
            top_k=50, 
            top_p=0.95,
            output_scores=True,     # Include scores for sampling
            return_dict_in_generate=True,
            use_cache=True
        ).sequences:

            # Decode each step incrementally
            decoded_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
            yield decoded_text  # Stream the partial text back to the UI

# Create a Gradio interface with streaming enabled
interface = gr.Interface(
    fn=generate_text,                          # Function to call when interacting with the UI
    inputs="text",                             # Input type: Single-line text
    outputs=gr.Markdown(),                     # Stream output using Markdown
    title="Quble Text Generation",             # Title of the UI
    description="Enter a prompt to generate text using Quble with live streaming."  # Simple description
)

# Launch the Gradio app
interface.launch()