model_name: "bn22/Mistral-7B-Instruct-v0.1-sharded" adapters_name: "potato101/mistralengft" device: "cuda" # BitsAndBytes Config bits_and_bytes_config: load_in_4bit: true bnb_4bit_use_double_quant: true bnb_4bit_quant_type: "nf4" bnb_4bit_compute_dtype: torch.bfloat16 # AutoModelForCausalLM Config auto_model_config: load_in_4bit: true torch_dtype: torch.bfloat16 # PeftModel Config peft_model_config: # Add PeftModel-specific configuration if needed # AutoTokenizer Config auto_tokenizer_config: bos_token_id: 1 # Inference Settings max_new_tokens: 200 do_sample: true # Miscellaneous prompt_prefix: "[INST]" exit_command: "exit" # Logging log_success_message: "Successfully loaded the model {model_name} into memory" # Model Input Processing model_input_processing: add_special_tokens: false # Output Display output_display: generated_output_message: "Generated Output:" separator_line: "=" * 50