Spaces:
Sleeping
Sleeping
import gradio as gr | |
from tokenizers import Tokenizer | |
llama3_tokenizer = Tokenizer.from_file("tokenizer-llama3.json") | |
deepseek_coder_tokenizer = Tokenizer.from_file("tokenizer-deepseek-coder.json") | |
def get_tokenizer(model): | |
tokenizer_mapping = { | |
"meta-llama/Meta-Llama-3-8B-Instruct": llama3_tokenizer, | |
"deepseek-ai/deepseek-coder-7b-instruct-v1.5": deepseek_coder_tokenizer | |
} | |
if model not in tokenizer_mapping: | |
raise Exception(f"Model {model} not supported.") | |
return tokenizer_mapping[model] | |
def count_tokens( | |
model, | |
target_text, | |
): | |
tokenizer = get_tokenizer(model) | |
toks = tokenizer.encode(target_text) | |
yield f"Token count: {len(toks.ids)}" | |
demo = gr.Interface( | |
fn=count_tokens, | |
inputs=[ | |
gr.Dropdown( | |
[ | |
"meta-llama/Meta-Llama-3-8B-Instruct", | |
"deepseek-ai/deepseek-coder-7b-instruct-v1.5", | |
], | |
value="meta-llama/Meta-Llama-3-8B-Instruct", | |
label="Model" | |
), | |
gr.Textbox( | |
label="Input", | |
info="Text to count tokens for", | |
lines=10, | |
), | |
], | |
outputs=["text"], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |