Spaces:
Sleeping
Sleeping
import gradio as gr | |
import tiktoken | |
import json | |
def count_tokens(text): | |
""" | |
Calculate the number of tokens in the input text using tiktoken. | |
Args: | |
text (str): The input text to be tokenized. | |
Returns: | |
int: The number of tokens in the input text. | |
""" | |
# Choose the encoding based on the model you are targeting. | |
# Here, we use 'gpt-3.5-turbo' as an example. | |
encoding = tiktoken.encoding_for_model("gpt-4") | |
# Encode the input text to get the list of token IDs | |
tokens = encoding.encode(text) | |
try: | |
parsed_json = json.loads(text) | |
text = json.dumps(parsed_json, indent=4, ensure_ascii=False) | |
except json.JSONDecodeError: | |
pass | |
text = text.replace("\\n", "\n") | |
return len(tokens), text | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=count_tokens, # The function to call | |
inputs=gr.Textbox(lines=1, max_lines=1000000, placeholder="Enter your text here..."), # Input component | |
outputs=[ | |
"number", | |
gr.Textbox(label="Beautified Text", lines=30) | |
], | |
title="Token Counter with tiktoken", | |
description="Enter text below to calculate the number of tokens using the tiktoken library.", | |
examples=[ | |
["Hello, how are you doing today?"], | |
["Gradio makes it easy to create web apps for machine learning models."], | |
["OpenAI's GPT models are powerful tools for natural language processing tasks."] | |
], | |
theme="default" | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
iface.launch() | |