|
model_name: "bn22/Mistral-7B-Instruct-v0.1-sharded" |
|
adapters_name: "potato101/mistralengft" |
|
device: "cuda" |
|
|
|
# BitsAndBytes Config |
|
bits_and_bytes_config: |
|
load_in_4bit: true |
|
bnb_4bit_use_double_quant: true |
|
bnb_4bit_quant_type: "nf4" |
|
bnb_4bit_compute_dtype: torch.bfloat16 |
|
|
|
# AutoModelForCausalLM Config |
|
auto_model_config: |
|
load_in_4bit: true |
|
torch_dtype: torch.bfloat16 |
|
|
|
# PeftModel Config |
|
peft_model_config: |
|
# Add PeftModel-specific configuration if needed |
|
|
|
# AutoTokenizer Config |
|
auto_tokenizer_config: |
|
bos_token_id: 1 |
|
|
|
# Inference Settings |
|
max_new_tokens: 200 |
|
do_sample: true |
|
|
|
# Miscellaneous |
|
prompt_prefix: "[INST]" |
|
exit_command: "exit" |
|
|
|
# Logging |
|
log_success_message: "Successfully loaded the model {model_name} into memory" |
|
|
|
# Model Input Processing |
|
model_input_processing: |
|
add_special_tokens: false |
|
|
|
# Output Display |
|
output_display: |
|
generated_output_message: "Generated Output:" |
|
separator_line: "=" * 50 |