Nigerian_languages / tests /test_tokenizer.py
Gabriel Okiri
Initial commit
4bb9d41
raw
history blame
351 Bytes
import pytest
from app.model.tokenizer import NigerianLanguageTokenizer
from transformers import AutoTokenizer
def test_tokenizer():
base_tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer = NigerianLanguageTokenizer(base_tokenizer)
text = "Sample text"
tokens = tokenizer.tokenize_batch([text])
assert tokens is not None