Text Generation
PEFT
Safetensors
Ukrainian
English
translation
Eval Results
robinhad commited on
Commit
12220ff
·
verified ·
1 Parent(s): 99a40c4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -4
README.md CHANGED
@@ -61,18 +61,19 @@ Performance on multi-sentence texts is not guaranteed, please be aware.
61
 
62
  ```python
63
  # pip install bitsandbytes transformers peft torch
64
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
65
  import torch
66
 
67
  config = PeftConfig.from_pretrained("lang-uk/dragoman")
68
  quant_config = BitsAndBytesConfig(
69
  load_in_4bit=True,
70
  bnb_4bit_quant_type="nf4",
71
- bnb_4bit_compute_dtype=float16,
72
  bnb_4bit_use_double_quant=False,
73
  )
74
 
75
- model = MistralForCausalLM.from_pretrained(
76
  "mistralai/Mistral-7B-v0.1", quantization_config=quant_config
77
  )
78
  model = PeftModel.from_pretrained(model, "lang-uk/dragoman").to("cuda")
@@ -83,7 +84,7 @@ tokenizer = AutoTokenizer.from_pretrained(
83
  input_text = "[INST] who holds this neighborhood? [/INST]" # model input should adhere to this format
84
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
85
 
86
- outputs = model.generate(**input_ids)
87
  print(tokenizer.decode(outputs[0]))
88
  ```
89
 
 
61
 
62
  ```python
63
  # pip install bitsandbytes transformers peft torch
64
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
65
+ from peft import PeftConfig, PeftModel
66
  import torch
67
 
68
  config = PeftConfig.from_pretrained("lang-uk/dragoman")
69
  quant_config = BitsAndBytesConfig(
70
  load_in_4bit=True,
71
  bnb_4bit_quant_type="nf4",
72
+ bnb_4bit_compute_dtype=torch.float16,
73
  bnb_4bit_use_double_quant=False,
74
  )
75
 
76
+ model = AutoModelForCausalLM.from_pretrained(
77
  "mistralai/Mistral-7B-v0.1", quantization_config=quant_config
78
  )
79
  model = PeftModel.from_pretrained(model, "lang-uk/dragoman").to("cuda")
 
84
  input_text = "[INST] who holds this neighborhood? [/INST]" # model input should adhere to this format
85
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
86
 
87
+ outputs = model.generate(**input_ids, num_beams=10)
88
  print(tokenizer.decode(outputs[0]))
89
  ```
90