Crystalcareai
/

Quiet-Star-Custom

Text Generation

Transformers

Safetensors

quiet

custom_code

Model card Files Files and versions Community

Crystalcareai commited on Apr 3, 2024

Commit

a9e5703

verified ·

1 Parent(s): f874538

Update modeling_quiet.py

Browse files

Files changed (1) hide show

modeling_quiet.py +1 -76

modeling_quiet.py CHANGED Viewed

@@ -2231,82 +2231,7 @@ class QuietForCausalLM(QuietPreTrainedModel):
             )
         return reordered_past
-    @torch.no_grad()
-    def generate(self, input_ids, attention_mask=None, **generate_kwargs):
-        batch_size, seq_len = input_ids.shape
-        max_length = generate_kwargs.get("max_length", self.config.max_length)
-        min_length = generate_kwargs.get("min_length", self.config.min_length)
-        do_sample = generate_kwargs.get("do_sample", self.config.do_sample)
-        early_stopping = generate_kwargs.get("early_stopping", self.config.early_stopping)
-        num_beams = generate_kwargs.get("num_beams", self.config.num_beams)
-        temperature = generate_kwargs.get("temperature", self.config.temperature)
-        top_k = generate_kwargs.get("top_k", self.config.top_k)
-        top_p = generate_kwargs.get("top_p", self.config.top_p)
-        repetition_penalty = generate_kwargs.get("repetition_penalty", self.config.repetition_penalty)
-        pad_token_id = generate_kwargs.get("pad_token_id", self.config.pad_token_id)
-        eos_token_id = generate_kwargs.get("eos_token_id", self.config.eos_token_id)
-        # Prepend the start thought token to the input sequence
-        start_thought_token_id = self.tokenizer.convert_tokens_to_ids("<|startthought|>")
-        input_ids = torch.cat([input_ids, torch.tensor([[start_thought_token_id]], device=input_ids.device)], dim=-1)
-        if attention_mask is not None:
-            attention_mask = torch.cat([attention_mask, torch.ones((batch_size, 1), device=attention_mask.device)], dim=-1)
-        thought_embeds = self.model.embed_tokens(input_ids)
-        past_key_values = None
-        unfinished_sequences = input_ids.new(batch_size).fill_(1)
-        sequence_lengths = input_ids.new(batch_size).fill_(max_length)
-        while True:
-            model_inputs = self.prepare_inputs_for_generation(input_ids, past_key_values=past_key_values, attention_mask=attention_mask, use_cache=True)
-            thought_outputs = self.model(**model_inputs, output_hidden_states=True, return_dict=True)
-            next_thought_embeds = self.prepare_thought_embeds(thought_outputs.hidden_states[-1], temperature=temperature)
-            thought_embeds = torch.cat([thought_embeds, next_thought_embeds], dim=1)
-            lm_logits = self.lm_head(thought_outputs.last_hidden_state)
-            lm_logits = lm_logits[:, -1, :] / temperature
-            if do_sample:
-                next_tokens = torch.multinomial(F.softmax(lm_logits, dim=-1), num_samples=1).squeeze(1)
-            else:
-                next_tokens = torch.argmax(lm_logits, dim=-1)
-            # Update input_ids, attention_mask and past_key_values
-            input_ids = torch.cat([input_ids, next_tokens.unsqueeze(-1)], dim=-1)
-            if attention_mask is not None:
-                attention_mask = torch.cat([attention_mask, attention_mask.new_ones((batch_size, 1))], dim=-1)
-            past_key_values = thought_outputs.past_key_values
-            # Check if generation is complete
-            if eos_token_id is not None:
-                unfinished_sequences = unfinished_sequences & (next_tokens != eos_token_id)
-            unfinished_sequences = unfinished_sequences & (input_ids.shape[-1] < max_length)
-            if unfinished_sequences.max() == 0:
-                break
-            elif input_ids.shape[-1] >= max_length:
-                input_ids[:, 0] = eos_token_id
-                break
-        return input_ids
-    def prepare_thought_embeds(self, hidden_states, temperature=1.0):
-        batch_size, seq_len, hidden_size = hidden_states.shape
-        if self.use_start_thought_token:
-            start_embed = self.start_embedding[0].unsqueeze(0).unsqueeze(0).repeat(batch_size, 1, 1) * temperature
-        else:
-            start_embed = hidden_states[:, :1, :]
-        if self.use_end_thought_token:
-            end_embed = self.end_embedding[0].unsqueeze(0).unsqueeze(0).repeat(batch_size, 1, 1) * temperature
-            thought_embeds = torch.cat([start_embed, hidden_states[:, 1:-1, :], end_embed], dim=1)
-        else:
-            thought_embeds = torch.cat([start_embed, hidden_states[:, 1:, :]], dim=1)
-        return thought_embeds
 @add_start_docstrings(

             )
         return reordered_past
 @add_start_docstrings(