Spaces:

jiangjiechen
/

tiktoken_count

Sleeping

File size: 1,575 Bytes

ede8cb5
 
6d2d4e1
ede8cb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d2d4e1
 
69b0a31
6d2d4e1
 
 
 
 
 
ede8cb5
 
 
 
bbc075a
 
 
 
 
ede8cb5

import gradio as gr
import tiktoken
import json

def count_tokens(text):
    """
    Calculate the number of tokens in the input text using tiktoken.

    Args:
        text (str): The input text to be tokenized.

    Returns:
        int: The number of tokens in the input text.
    """
    # Choose the encoding based on the model you are targeting.
    # Here, we use 'gpt-3.5-turbo' as an example.
    encoding = tiktoken.encoding_for_model("gpt-4")
    
    # Encode the input text to get the list of token IDs
    tokens = encoding.encode(text)
    
    try:
        parsed_json = json.loads(text)
        text = json.dumps(parsed_json, indent=4, ensure_ascii=False)
    except json.JSONDecodeError:
        pass
   
    text = text.replace("\\n", "\n")
    
    return len(tokens), text

# Define the Gradio interface
iface = gr.Interface(
    fn=count_tokens,                   # The function to call
    inputs=gr.Textbox(lines=1, max_lines=1000000, placeholder="Enter your text here..."),  # Input component
    outputs=[
        "number",
        gr.Textbox(label="Beautified Text", lines=30)
    ],
    title="Token Counter with tiktoken",
    description="Enter text below to calculate the number of tokens using the tiktoken library.",
    examples=[
        ["Hello, how are you doing today?"],
        ["Gradio makes it easy to create web apps for machine learning models."],
        ["OpenAI's GPT models are powerful tools for natural language processing tasks."]
    ],
    theme="default"
)

# Launch the app
if __name__ == "__main__":
    iface.launch()