import gradio as gr import tiktoken import json def count_tokens(text): """ Calculate the number of tokens in the input text using tiktoken. Args: text (str): The input text to be tokenized. Returns: int: The number of tokens in the input text. """ # Choose the encoding based on the model you are targeting. # Here, we use 'gpt-3.5-turbo' as an example. encoding = tiktoken.encoding_for_model("gpt-4") # Encode the input text to get the list of token IDs tokens = encoding.encode(text) try: parsed_json = json.loads(text) text = json.dumps(parsed_json, indent=4, ensure_ascii=False) except json.JSONDecodeError: pass text = text.replace("\\n", "\n") return len(tokens), text # Define the Gradio interface iface = gr.Interface( fn=count_tokens, # The function to call inputs=gr.Textbox(lines=1, max_lines=1000000, placeholder="Enter your text here..."), # Input component outputs=[ "number", gr.Textbox(label="Beautified Text", lines=30) ], title="Token Counter with tiktoken", description="Enter text below to calculate the number of tokens using the tiktoken library.", examples=[ ["Hello, how are you doing today?"], ["Gradio makes it easy to create web apps for machine learning models."], ["OpenAI's GPT models are powerful tools for natural language processing tasks."] ], theme="default" ) # Launch the app if __name__ == "__main__": iface.launch()