When the model is called, an error is encountered
#46
by
BANGYU
- opened
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/binary/py_inference/serving/servers/http_server.py", line 57, in do_POST
response_str = self.executor.exec_json(post_data)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/binary/py_inference/serving/module_executor.py", line 86, in exec_json
raise e
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/binary/py_inference/serving/module_executor.py", line 76, in exec_json
result = self.do_exec(args_json)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/binary/py_inference/serving/module_executor.py", line 97, in do_exec
result = self.inference_worker.inference(**args)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/binary/starcoder_inference/main.py", line 46, in inference
res = self.predict(text, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/binary/starcoder_inference/main.py", line 42, in predict
outputs = self.model.generate(tokens, do_sample=True, pad_token_id=self.tokenizer.eos_token_id, temperature=temperature, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty)
File "/usr/local/lib64/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/transformers/generation/utils.py", line 1565, in generate
return self.sample(
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/transformers/generation/utils.py", line 2612, in sample
outputs = self(
File "/usr/local/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 808, in forward
transformer_outputs = self.transformer(
File "/usr/local/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 673, in forward
outputs = block(
File "/usr/local/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 316, in forward
attn_outputs = self.attn(
File "/usr/local/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py", line 230, in forward
query, key_value = self.c_attn(hidden_states).split((self.embed_dim, 2 * self.kv_dim), dim=2)
File "/usr/local/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 320, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 500, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/admin/worker/slave/suezops_c2_prod_pre_star_coder.pre_star_coder_15_29/python_worker/python_user_base/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 417, in forward
output += torch.matmul(subA, state.subB)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x3 and 2x6400)
But this problem is sporadic,Has anyone encountered a similar error?
Can you provide the code for how you're loading the model and using it for generation?
loubnabnl
changed discussion status to
closed