Tu2003716 commited on
Commit
9d3c242
·
verified ·
1 Parent(s): 027ecf9

Change flow to allow user to choose the attn type

Browse files
Files changed (1) hide show
  1. modeling_cocom.py +3 -3
modeling_cocom.py CHANGED
@@ -71,7 +71,7 @@ class COCOMConfig(PretrainedConfig):
71
  lora = False,
72
  training_form="both",
73
  lora_r=16,
74
- disable_flash_attention=True,
75
  **kwargs):
76
  super().__init__(**kwargs)
77
 
@@ -85,14 +85,14 @@ class COCOMConfig(PretrainedConfig):
85
  self.lora = lora # boolean type, whether to use lora trsining
86
  self.training_form = training_form # training form, could be compressor: training only comprssor; both:
87
  self.lora_r = lora_r # lora_r for lora training, we use 16 throughout the experiment.
88
- self.disable_flash_attention = disable_flash_attention
89
 
90
  class COCOM(PreTrainedModel):
91
  config_class = COCOMConfig
92
  def __init__(self, cfg):
93
  super().__init__(cfg)
94
  # define models
95
- attn_impl = "flash_attention_2" if not cfg.disable_flash_attention else "default"
96
  # model could be loaded in three quantization modes: no, int4, int8
97
  if cfg.quantization == "no":
98
  self.decoder = AutoModelForCausalLM.from_pretrained(
 
71
  lora = False,
72
  training_form="both",
73
  lora_r=16,
74
+ attn_implementation="eager",
75
  **kwargs):
76
  super().__init__(**kwargs)
77
 
 
85
  self.lora = lora # boolean type, whether to use lora trsining
86
  self.training_form = training_form # training form, could be compressor: training only comprssor; both:
87
  self.lora_r = lora_r # lora_r for lora training, we use 16 throughout the experiment.
88
+ self.attn_implementation = attn_implementation
89
 
90
  class COCOM(PreTrainedModel):
91
  config_class = COCOMConfig
92
  def __init__(self, cfg):
93
  super().__init__(cfg)
94
  # define models
95
+ attn_impl = cfg.attn_implementation
96
  # model could be loaded in three quantization modes: no, int4, int8
97
  if cfg.quantization == "no":
98
  self.decoder = AutoModelForCausalLM.from_pretrained(