Spaces:
Sleeping
Sleeping
File size: 1,488 Bytes
c8bbd2d 516caa2 328dbfe c8bbd2d 303b266 c8bbd2d 303b266 c8bbd2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import tensorflow as tf
import keras_nlp
MAX_SEQUENCE_LENGTH = 64
model = tf.keras.models.load_model('saved_model/mtr-model')
eng_tokenizer = keras_nlp.tokenizers.SentencePieceTokenizer(
'saved_model/spmodel/en.model')
tha_tokenizer = keras_nlp.tokenizers.SentencePieceTokenizer(
'saved_model/spmodel/th.model')
def decode_sequences(input_sentences):
batch_size = tf.shape(input_sentences)[0]
encoder_input_tokens = eng_tokenizer(input_sentences).to_tensor(
shape=(None, MAX_SEQUENCE_LENGTH)
)
def token_probability_fn(decoder_input_tokens):
return model([encoder_input_tokens, decoder_input_tokens])[:, -1, :]
prompt = tf.fill((batch_size, 1), tha_tokenizer.token_to_id("<s>"))
generated_tokens = keras_nlp.utils.top_p_search(
token_probability_fn,
prompt,
p=0.1,
max_length=40,
end_token_id=tha_tokenizer.token_to_id("</s>"),
)
generated_sentences = tha_tokenizer.detokenize(generated_tokens)
return generated_sentences
def greet(english_lyrics):
translated = decode_sequences(tf.constant([english_lyrics]))
translated = translated.numpy()[0].decode("utf-8")
translated = (
translated
.replace("<pad>", "")
.replace("<s>", "")
.replace("</s>", "")
.replace("⁇", "")
.strip()
)
return translated
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch() |