avoid duplicate generate args
Browse files- modeling_minicpmo.py +1 -1
modeling_minicpmo.py
CHANGED
@@ -636,6 +636,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
636 |
return self.llm(input_ids=None, position_ids=position_ids, inputs_embeds=vllm_embedding, **kwargs)
|
637 |
|
638 |
def _decode(self, inputs_embeds, tokenizer, attention_mask, **kwargs):
|
|
|
639 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
640 |
outputs = self.llm.generate(
|
641 |
inputs_embeds=inputs_embeds,
|
@@ -649,7 +650,6 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
|
|
649 |
return outputs
|
650 |
|
651 |
def _decode_stream(self, inputs_embeds, tokenizer, **kwargs):
|
652 |
-
kwargs.pop("output_hidden_states", None)
|
653 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
654 |
streamer = TextIteratorStreamer(tokenizer=tokenizer)
|
655 |
generation_kwargs = {
|
|
|
636 |
return self.llm(input_ids=None, position_ids=position_ids, inputs_embeds=vllm_embedding, **kwargs)
|
637 |
|
638 |
def _decode(self, inputs_embeds, tokenizer, attention_mask, **kwargs):
|
639 |
+
kwargs.pop("output_hidden_states", None)
|
640 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
641 |
outputs = self.llm.generate(
|
642 |
inputs_embeds=inputs_embeds,
|
|
|
650 |
return outputs
|
651 |
|
652 |
def _decode_stream(self, inputs_embeds, tokenizer, **kwargs):
|
|
|
653 |
terminators = [tokenizer.convert_tokens_to_ids(i) for i in self.terminators]
|
654 |
streamer = TextIteratorStreamer(tokenizer=tokenizer)
|
655 |
generation_kwargs = {
|