model_name: "bn22/Mistral-7B-Instruct-v0.1-sharded"
adapters_name: "potato101/mistralengft"
device: "cuda"

# BitsAndBytes Config
bits_and_bytes_config:
  load_in_4bit: true
  bnb_4bit_use_double_quant: true
  bnb_4bit_quant_type: "nf4"
  bnb_4bit_compute_dtype: torch.bfloat16

# AutoModelForCausalLM Config
auto_model_config:
  load_in_4bit: true
  torch_dtype: torch.bfloat16

# PeftModel Config
peft_model_config:
  # Add PeftModel-specific configuration if needed

# AutoTokenizer Config
auto_tokenizer_config:
  bos_token_id: 1

# Inference Settings
max_new_tokens: 200
do_sample: true

# Miscellaneous
prompt_prefix: "[INST]"
exit_command: "exit"

# Logging
log_success_message: "Successfully loaded the model {model_name} into memory"

# Model Input Processing
model_input_processing:
  add_special_tokens: false

# Output Display
output_display:
  generated_output_message: "Generated Output:"
  separator_line: "=" * 50