Change flow to allow user to choose the attn type
Browse files- modeling_cocom.py +3 -3
modeling_cocom.py
CHANGED
@@ -71,7 +71,7 @@ class COCOMConfig(PretrainedConfig):
|
|
71 |
lora = False,
|
72 |
training_form="both",
|
73 |
lora_r=16,
|
74 |
-
|
75 |
**kwargs):
|
76 |
super().__init__(**kwargs)
|
77 |
|
@@ -85,14 +85,14 @@ class COCOMConfig(PretrainedConfig):
|
|
85 |
self.lora = lora # boolean type, whether to use lora trsining
|
86 |
self.training_form = training_form # training form, could be compressor: training only comprssor; both:
|
87 |
self.lora_r = lora_r # lora_r for lora training, we use 16 throughout the experiment.
|
88 |
-
self.
|
89 |
|
90 |
class COCOM(PreTrainedModel):
|
91 |
config_class = COCOMConfig
|
92 |
def __init__(self, cfg):
|
93 |
super().__init__(cfg)
|
94 |
# define models
|
95 |
-
attn_impl =
|
96 |
# model could be loaded in three quantization modes: no, int4, int8
|
97 |
if cfg.quantization == "no":
|
98 |
self.decoder = AutoModelForCausalLM.from_pretrained(
|
|
|
71 |
lora = False,
|
72 |
training_form="both",
|
73 |
lora_r=16,
|
74 |
+
attn_implementation="eager",
|
75 |
**kwargs):
|
76 |
super().__init__(**kwargs)
|
77 |
|
|
|
85 |
self.lora = lora # boolean type, whether to use lora trsining
|
86 |
self.training_form = training_form # training form, could be compressor: training only comprssor; both:
|
87 |
self.lora_r = lora_r # lora_r for lora training, we use 16 throughout the experiment.
|
88 |
+
self.attn_implementation = attn_implementation
|
89 |
|
90 |
class COCOM(PreTrainedModel):
|
91 |
config_class = COCOMConfig
|
92 |
def __init__(self, cfg):
|
93 |
super().__init__(cfg)
|
94 |
# define models
|
95 |
+
attn_impl = cfg.attn_implementation
|
96 |
# model could be loaded in three quantization modes: no, int4, int8
|
97 |
if cfg.quantization == "no":
|
98 |
self.decoder = AutoModelForCausalLM.from_pretrained(
|