Spaces:
Sleeping
Sleeping
File size: 1,575 Bytes
ede8cb5 6d2d4e1 ede8cb5 6d2d4e1 69b0a31 6d2d4e1 ede8cb5 bbc075a ede8cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import tiktoken
import json
def count_tokens(text):
"""
Calculate the number of tokens in the input text using tiktoken.
Args:
text (str): The input text to be tokenized.
Returns:
int: The number of tokens in the input text.
"""
# Choose the encoding based on the model you are targeting.
# Here, we use 'gpt-3.5-turbo' as an example.
encoding = tiktoken.encoding_for_model("gpt-4")
# Encode the input text to get the list of token IDs
tokens = encoding.encode(text)
try:
parsed_json = json.loads(text)
text = json.dumps(parsed_json, indent=4, ensure_ascii=False)
except json.JSONDecodeError:
pass
text = text.replace("\\n", "\n")
return len(tokens), text
# Define the Gradio interface
iface = gr.Interface(
fn=count_tokens, # The function to call
inputs=gr.Textbox(lines=1, max_lines=1000000, placeholder="Enter your text here..."), # Input component
outputs=[
"number",
gr.Textbox(label="Beautified Text", lines=30)
],
title="Token Counter with tiktoken",
description="Enter text below to calculate the number of tokens using the tiktoken library.",
examples=[
["Hello, how are you doing today?"],
["Gradio makes it easy to create web apps for machine learning models."],
["OpenAI's GPT models are powerful tools for natural language processing tasks."]
],
theme="default"
)
# Launch the app
if __name__ == "__main__":
iface.launch()
|