File size: 1,488 Bytes
c8bbd2d
 
 
 
516caa2
328dbfe
c8bbd2d
 
303b266
c8bbd2d
303b266
c8bbd2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import tensorflow as tf
import keras_nlp

MAX_SEQUENCE_LENGTH = 64
model = tf.keras.models.load_model('saved_model/mtr-model')

eng_tokenizer = keras_nlp.tokenizers.SentencePieceTokenizer(
            'saved_model/spmodel/en.model')
tha_tokenizer = keras_nlp.tokenizers.SentencePieceTokenizer(
            'saved_model/spmodel/th.model')

def decode_sequences(input_sentences):
    batch_size = tf.shape(input_sentences)[0]

    encoder_input_tokens = eng_tokenizer(input_sentences).to_tensor(
        shape=(None, MAX_SEQUENCE_LENGTH)
    )
    def token_probability_fn(decoder_input_tokens):
        return model([encoder_input_tokens, decoder_input_tokens])[:, -1, :]

    prompt = tf.fill((batch_size, 1), tha_tokenizer.token_to_id("<s>"))

    generated_tokens = keras_nlp.utils.top_p_search(
        token_probability_fn,
        prompt,
        p=0.1,
        max_length=40,
        end_token_id=tha_tokenizer.token_to_id("</s>"),
    )

    generated_sentences = tha_tokenizer.detokenize(generated_tokens)
    return generated_sentences

def greet(english_lyrics):
    translated = decode_sequences(tf.constant([english_lyrics]))
    translated = translated.numpy()[0].decode("utf-8")
    translated = (
        translated
        .replace("<pad>", "")
        .replace("<s>", "")
        .replace("</s>", "")
        .replace("⁇", "")
        .strip()
    )
    return translated

iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()