Update README.md
Browse files
README.md
CHANGED
@@ -134,15 +134,15 @@ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
|
134 |
|
135 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
136 |
model_basename=model_basename,
|
|
|
137 |
use_safetensors=True,
|
138 |
trust_remote_code=False,
|
139 |
-
device="cuda:0",
|
140 |
use_triton=use_triton,
|
141 |
quantize_config=None)
|
142 |
|
143 |
-
prompt = "
|
144 |
-
prompt_template=f'''{prompt}
|
145 |
-
|
146 |
'''
|
147 |
|
148 |
print("\n\n*** Generate:")
|
|
|
134 |
|
135 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
136 |
model_basename=model_basename,
|
137 |
+
max_memory={0: '60GiB', 1: '79GiB'} # max_memory is for 2 x 80GB GPUs; adjust if your config is different!
|
138 |
use_safetensors=True,
|
139 |
trust_remote_code=False,
|
|
|
140 |
use_triton=use_triton,
|
141 |
quantize_config=None)
|
142 |
|
143 |
+
prompt = "Write a story about llamas"
|
144 |
+
prompt_template=f'''<human>: {prompt}
|
145 |
+
<bot>:
|
146 |
'''
|
147 |
|
148 |
print("\n\n*** Generate:")
|