INVOHIDE_inisw8 / recognize.py
esun-choi's picture
Initial Commit
9b879f1
raw
history blame contribute delete
635 Bytes
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AutoTokenizer
import unicodedata
def recongize(img):
processor = TrOCRProcessor.from_pretrained("trocr_weight")
model = VisionEncoderDecoderModel.from_pretrained("trocr_weight")
tokenizer = AutoTokenizer.from_pretrained("trocr_weight")
pixel_values = processor(img, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values, max_length=64)
generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
generated_text = unicodedata.normalize("NFC", generated_text)
return generated_text