Apoorvakoira commited on
Commit
c2cba18
·
1 Parent(s): fec7ca7

Upload 4 files

Browse files
Files changed (3) hide show
  1. README.md +89 -0
  2. adapter_config.json +18 -0
  3. adapter_model.bin +3 -0
README.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - tatsu-lab/alpaca
4
+ language:
5
+ - en
6
+ ---
7
+ ### Model card for Alpaca-30B
8
+
9
+ This is a Llama model instruction-finetuned with LoRa for 3 epochs on the Tatsu Labs Alpaca dataset. It was trained in 8bit mode.
10
+
11
+ To run this model, you can run the following or use the following repo for [generation](https://github.com/aspctu/alpaca-lora).
12
+
13
+ ```
14
+ # Code adapted from https://github.com/tloen/alpaca-lora
15
+ import torch
16
+ from peft import PeftModel
17
+ import transformers
18
+
19
+ from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
20
+
21
+ tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-30b-hf")
22
+
23
+ model = LlamaForCausalLM.from_pretrained(
24
+ "decapoda-research/llama-30b-hf",
25
+ load_in_8bit=True,
26
+ torch_dtype=torch.float16,
27
+ device_map="auto",
28
+ )
29
+
30
+ model = PeftModel.from_pretrained(
31
+ model,
32
+ "baseten/alpaca-30b",
33
+ torch_dtype=torch.float16
34
+ )
35
+
36
+ def generate_prompt(instruction, input=None):
37
+ if input:
38
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
39
+
40
+ ### Instruction:
41
+ {instruction}
42
+
43
+ ### Input:
44
+ {input}
45
+
46
+ ### Response:"""
47
+ else:
48
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
49
+
50
+ ### Instruction:
51
+ {instruction}
52
+
53
+ ### Response:"""
54
+
55
+
56
+ model.eval()
57
+
58
+
59
+ def evaluate(
60
+ instruction,
61
+ input=None,
62
+ temperature=0.1,
63
+ top_p=0.75,
64
+ top_k=40,
65
+ num_beams=4,
66
+ **kwargs,
67
+ ):
68
+ prompt = generate_prompt(instruction, input)
69
+ inputs = tokenizer(prompt, return_tensors="pt")
70
+ input_ids = inputs["input_ids"].to(device)
71
+ generation_config = GenerationConfig(
72
+ temperature=temperature,
73
+ top_p=top_p,
74
+ top_k=top_k,
75
+ num_beams=num_beams,
76
+ **kwargs,
77
+ )
78
+ with torch.no_grad():
79
+ generation_output = model.generate(
80
+ input_ids=input_ids,
81
+ generation_config=generation_config,
82
+ return_dict_in_generate=True,
83
+ output_scores=True,
84
+ max_new_tokens=2048,
85
+ )
86
+ s = generation_output.sequences[0]
87
+ output = tokenizer.decode(s)
88
+ return output.split("### Response:")[1].strip()
89
+ ```
adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "decapoda-research/llama-30b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "merge_weights": false,
10
+ "modules_to_save": null,
11
+ "peft_type": "LORA",
12
+ "r": 8,
13
+ "target_modules": [
14
+ "q_proj",
15
+ "v_proj"
16
+ ],
17
+ "task_type": "CAUSAL_LM"
18
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0365143b9adc0323d00b43abbebcb6412e2a15397de20c8d1da45d96f950267f
3
+ size 133