tiktoken_count / app.py
jiangjiechen's picture
ensure ascii=False
69b0a31
import gradio as gr
import tiktoken
import json
def count_tokens(text):
"""
Calculate the number of tokens in the input text using tiktoken.
Args:
text (str): The input text to be tokenized.
Returns:
int: The number of tokens in the input text.
"""
# Choose the encoding based on the model you are targeting.
# Here, we use 'gpt-3.5-turbo' as an example.
encoding = tiktoken.encoding_for_model("gpt-4")
# Encode the input text to get the list of token IDs
tokens = encoding.encode(text)
try:
parsed_json = json.loads(text)
text = json.dumps(parsed_json, indent=4, ensure_ascii=False)
except json.JSONDecodeError:
pass
text = text.replace("\\n", "\n")
return len(tokens), text
# Define the Gradio interface
iface = gr.Interface(
fn=count_tokens, # The function to call
inputs=gr.Textbox(lines=1, max_lines=1000000, placeholder="Enter your text here..."), # Input component
outputs=[
"number",
gr.Textbox(label="Beautified Text", lines=30)
],
title="Token Counter with tiktoken",
description="Enter text below to calculate the number of tokens using the tiktoken library.",
examples=[
["Hello, how are you doing today?"],
["Gradio makes it easy to create web apps for machine learning models."],
["OpenAI's GPT models are powerful tools for natural language processing tasks."]
],
theme="default"
)
# Launch the app
if __name__ == "__main__":
iface.launch()