Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Apr 5, 2024

Commit

91359bc

·

verified ·

1 Parent(s): c0dd54c

Update modeling_quiet.py

Files changed (1) hide show

modeling_quiet.py +4 -66

modeling_quiet.py CHANGED Viewed

@@ -48,7 +48,7 @@ from transformers.utils import (
 	replace_return_docstrings,
 )
 from .configuration_quiet import QuietConfig
 import time
 from typing import Optional, List
@@ -1423,71 +1423,9 @@ class QuietForCausalLM(QuietPreTrainedModel, GenerationMixin):
 		logits = self.lm_head(mixed_hidden_states)
 		return logits
-	def custom_generate(model, input_ids, attention_mask, max_length, streamer=None, **kwargs):
-		# Set up some variables
-		batch_size, seq_len = input_ids.shape
-		max_length = max_length if max_length is not None else model.config.max_length
-		max_new_tokens = max_length - seq_len
-		temperature = kwargs.get("temperature", 1.0)
-		with torch.no_grad():
-			for cur_token_idx in range(max_new_tokens):
-				# Run a forward pass to get the logits for the next token
-				outputs = model(
-					input_ids=input_ids,
-					attention_mask=attention_mask,
-					use_cache=True,
-				)
-				logits = outputs.logits[:, -1, :]
-				# Sample the next token from the logits
-				next_token_logits = logits / temperature
-				next_token_id = torch.multinomial(torch.nn.functional.softmax(next_token_logits, dim=-1), num_samples=1)
-				# Append the new token to the input sequence
-				input_ids = torch.cat([input_ids, next_token_id], dim=-1)
-				attention_mask = torch.cat([attention_mask, torch.ones((batch_size, 1), dtype=attention_mask.dtype, device=attention_mask.device)], dim=-1)
-				# Stream the new token if a streamer is provided
-				if streamer is not None:
-					streamer.put(next_token_id)
-				# Check if the end token is generated for all sequences in the batch
-				if next_token_id.eq(model.config.eos_token_id).all():
-					break
-		return input_ids
-	# Add this to QuietForCausalLM forward method to support custom generate
-	@torch.no_grad()
-	def generate(
-		self,
-		input_ids,
-		attention_mask=None,
-		max_length=None,
-		streamer=None,
-		**kwargs,
-	):
-		# Prepare inputs
-		batch_size, seq_len = input_ids.shape
-		if attention_mask is None:
-			attention_mask = torch.ones_like(input_ids)
-		# Call the custom generate function
-		output_ids = custom_generate(
-			self,
-			input_ids=input_ids,
-			attention_mask=attention_mask,
-			max_length=max_length,
-			streamer=streamer,
-			**kwargs,
-		)
-		return output_ids
 	@add_start_docstrings_to_model_forward(QUIET_INPUTS_DOCSTRING)
 	@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)

 	replace_return_docstrings,
 )
 from .configuration_quiet import QuietConfig
+from .generate import generate
 import time
 from typing import Optional, List
 		logits = self.lm_head(mixed_hidden_states)
 		return logits
+    def generate(self, input_ids, attention_mask=None, max_length=None, temperature=1.0, **kwargs):
+        from .generate import generate
+        return generate(self, input_ids, attention_mask, max_length, temperature, **kwargs)
 	@add_start_docstrings_to_model_forward(QUIET_INPUTS_DOCSTRING)
 	@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)