ctranslate2-4you commited on
Commit
bb1d05d
·
verified ·
1 Parent(s): f95ed93

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +82 -0
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - mistralai/Mistral-Nemo-Instruct-2407
4
+ ---
5
+
6
+ Ctranslate2 conversion of the model located at [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)
7
+
8
+ Conversion script with graphical user interface can be downloaded [HERE](https://github.com/BBC-Esq/Ctranslate2-Converter)
9
+
10
+ ## Tested with Ctranslate 4.4.0 and Torch 2.2.2
11
+ - NOTE: Ctranslate2 will soon release version 4.5.0, which will require greater than Torch 2.2.2.
12
+
13
+ ## Example Usage:
14
+
15
+ ```
16
+ import os
17
+ import sys
18
+ import ctranslate2
19
+ import gc
20
+ import torch
21
+ from transformers import AutoTokenizer
22
+
23
+ system_message = "You are a helpful person who answers questions."
24
+ user_message = "Hello, how are you today? I'd like you to write me a funny poem that is a parody of Milton's Paradise Lost if you are familiar with that famous epic poem?"
25
+
26
+ model_dir = r"D:\Scripts\bench_chat\models\mistralai--Mistral-Nemo-Instruct-2407-ct2-int8"
27
+
28
+
29
+ def build_prompt_mistral_nemo():
30
+ prompt = f"""<s>
31
+ [INST]{system_message}
32
+
33
+ {user_message}[/INST]"""
34
+
35
+ return prompt
36
+
37
+
38
+ def main():
39
+ model_name = os.path.basename(model_dir)
40
+
41
+ print(f"\033[32mLoading the model: {model_name}...\033[0m")
42
+
43
+ intra_threads = max(os.cpu_count() - 4, 4)
44
+
45
+ generator = ctranslate2.Generator(
46
+ model_dir,
47
+ device="cuda",
48
+ compute_type="int8",
49
+ intra_threads=intra_threads
50
+ )
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model_dir, add_prefix_space=None)
53
+
54
+ prompt = build_prompt_mistral_nemo()
55
+
56
+ tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt))
57
+
58
+ results_batch = generator.generate_batch(
59
+ [tokens],
60
+ include_prompt_in_result=False,
61
+ max_batch_size=4096,
62
+ batch_type="tokens",
63
+ beam_size=1,
64
+ num_hypotheses=1,
65
+ max_length=512,
66
+ sampling_temperature=0.0,
67
+ )
68
+
69
+ output = tokenizer.decode(results_batch[0].sequences_ids[0])
70
+
71
+ print("\nGenerated response:")
72
+ print(output)
73
+
74
+ del generator
75
+ del tokenizer
76
+ torch.cuda.empty_cache()
77
+ gc.collect()
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()
82
+ ```