Update README.md
Browse files
README.md
CHANGED
@@ -14,7 +14,6 @@ The following sample assumes that the setup on the above page has been completed
|
|
14 |
|
15 |
This model has only been tested on RyzenAI for Windows 11. It does not work in Linux environments such as WSL.
|
16 |
|
17 |
-
|
18 |
|
19 |
### setup
|
20 |
In cmd windows.
|
@@ -26,16 +25,12 @@ pip install transformers==4.43.3
|
|
26 |
# Updating the Transformers library will cause the LLama 2 sample to stop working.
|
27 |
# If you want to run LLama 2, revert to pip install transformers==4.34.0.
|
28 |
pip install tokenizers==0.19.1
|
|
|
29 |
|
30 |
-
|
31 |
-
git clone https://huggingface.co/dahara1/llama3.1-8b-Instruct-amd-npu
|
32 |
-
cd llama3.1-8b-Instruct-amd-npu
|
33 |
-
git lfs pull
|
34 |
-
cd ..
|
35 |
-
copy <your_install_path>\RyzenAI-SW\example\transformers\models\llama2\modeling_llama_amd.py .
|
36 |
|
37 |
# set up Runtime. see https://ryzenai.docs.amd.com/en/latest/runtime_setup.html
|
38 |
-
set XLNX_VART_FIRMWARE=<
|
39 |
set NUM_OF_DPU_RUNNERS=1
|
40 |
|
41 |
# save below sample script as utf8 and llama-3.1-test.py
|
@@ -83,7 +78,7 @@ if __name__ == "__main__":
|
|
83 |
torch.set_num_threads(4)
|
84 |
|
85 |
tokenizer = AutoTokenizer.from_pretrained("llama3.1-8b-Instruct-amd-npu")
|
86 |
-
ckpt = "llama3.1-8b-Instruct-amd-npu
|
87 |
terminators = [
|
88 |
tokenizer.eos_token_id,
|
89 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
14 |
|
15 |
This model has only been tested on RyzenAI for Windows 11. It does not work in Linux environments such as WSL.
|
16 |
|
|
|
17 |
|
18 |
### setup
|
19 |
In cmd windows.
|
|
|
25 |
# Updating the Transformers library will cause the LLama 2 sample to stop working.
|
26 |
# If you want to run LLama 2, revert to pip install transformers==4.34.0.
|
27 |
pip install tokenizers==0.19.1
|
28 |
+
pip install -U "huggingface_hub[cli]""
|
29 |
|
30 |
+
huggingface-cli download dahara1/llama3.1-8b-Instruct-amd-npu --revision main --local-dir llama3.1-8b-Instruct-amd-npu
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# set up Runtime. see https://ryzenai.docs.amd.com/en/latest/runtime_setup.html
|
33 |
+
set XLNX_VART_FIRMWARE=<your_firmware_install_path>\voe-4.0-win_amd64\1x4.xclbin
|
34 |
set NUM_OF_DPU_RUNNERS=1
|
35 |
|
36 |
# save below sample script as utf8 and llama-3.1-test.py
|
|
|
78 |
torch.set_num_threads(4)
|
79 |
|
80 |
tokenizer = AutoTokenizer.from_pretrained("llama3.1-8b-Instruct-amd-npu")
|
81 |
+
ckpt = r"llama3.1-8b-Instruct-amd-npu\llama3.1_8b_w_bit_4_awq_amd.pt"
|
82 |
terminators = [
|
83 |
tokenizer.eos_token_id,
|
84 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|