robinhad theodotus commited on
Commit
36ba107
·
0 Parent(s):

Duplicate from theodotus/pythia-uk

Browse files

Co-authored-by: Bohdan Mykhailenko <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Pythia UK
3
+ emoji: 😻
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.34.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: theodotus/pythia-uk
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctranslate2
2
+ from transformers import AutoTokenizer
3
+
4
+ import threading
5
+ import gradio as gr
6
+
7
+ from typing import Optional
8
+ from queue import Queue
9
+
10
+
11
+
12
+
13
+ class TokenIteratorStreamer:
14
+ def __init__(self, end_token_id: int, timeout: Optional[float] = None):
15
+ self.end_token_id = end_token_id
16
+ self.queue = Queue()
17
+ self.timeout = timeout
18
+
19
+ def put(self, token: int):
20
+ self.queue.put(token, timeout=self.timeout)
21
+
22
+ def __iter__(self):
23
+ return self
24
+
25
+ def __next__(self):
26
+ token = self.queue.get(timeout=self.timeout)
27
+ if token == self.end_token_id:
28
+ raise StopIteration()
29
+ else:
30
+ return token
31
+
32
+
33
+
34
+ def generate_prompt(history):
35
+ prompt = ""
36
+ for chain in history[:-1]:
37
+ prompt += f"<human>: {chain[0]}\n<bot>: {chain[1]}\n"
38
+ prompt += f"<human>: {history[-1][0]}\n<bot>:"
39
+ tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt))
40
+ return tokens
41
+
42
+ def generate(streamer, history):
43
+ def stepResultCallback(result):
44
+ streamer.put(result.token_id)
45
+ if result.is_last and (result.token_id != end_token_id):
46
+ streamer.put(end_token_id)
47
+ print(f"step={result.step}, batch_id={result.batch_id}, token={result.token}")
48
+
49
+ tokens = generate_prompt(history)
50
+
51
+ results = translator.translate_batch(
52
+ [tokens],
53
+ beam_size=1,
54
+ max_decoding_length = 256,
55
+ repetition_penalty = 1.8,
56
+ callback = stepResultCallback
57
+ )
58
+ return results
59
+
60
+
61
+
62
+ translator = ctranslate2.Translator("model", intra_threads=2)
63
+ tokenizer = AutoTokenizer.from_pretrained("DKYoon/mt5-xl-lm-adapt")
64
+ end_token = "</s>"
65
+ end_token_id = tokenizer.encode(end_token)[0]
66
+
67
+
68
+ with gr.Blocks() as demo:
69
+ chatbot = gr.Chatbot()
70
+ msg = gr.Textbox()
71
+ clear = gr.Button("Clear")
72
+
73
+ def user(user_message, history):
74
+ return "", history + [[user_message, ""]]
75
+
76
+ def bot(history):
77
+ bot_message_tokens = []
78
+ streamer = TokenIteratorStreamer(end_token_id = end_token_id)
79
+ generation_thread = threading.Thread(target=generate, args=(streamer, history))
80
+ generation_thread.start()
81
+
82
+ for token in streamer:
83
+ bot_message_tokens.append(token)
84
+ history[-1][1] = tokenizer.decode(bot_message_tokens)
85
+ yield history
86
+ generation_thread.join()
87
+
88
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
89
+ bot, chatbot, chatbot
90
+ )
91
+ clear.click(lambda: None, None, chatbot, queue=False)
92
+
93
+ demo.queue()
94
+ if __name__ == "__main__":
95
+ demo.launch()
model/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_source_bos": false,
3
+ "add_source_eos": false,
4
+ "bos_token": "<pad>",
5
+ "decoder_start_token": "<pad>",
6
+ "eos_token": "</s>",
7
+ "layer_norm_epsilon": null,
8
+ "unk_token": "<unk>"
9
+ }
model/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667c05b1b5b60515f93903db9dfd78c42ad7bc1b748820fa0cdac9c079392082
3
+ size 7485277864
model/shared_vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ctranslate2
2
+ transformers
3
+ SentencePiece