Crystalcareai
commited on
Update modeling_quiet.py
Browse files- modeling_quiet.py +9 -1
modeling_quiet.py
CHANGED
@@ -1182,7 +1182,6 @@ class QuietForCausalLM(QuietPreTrainedModel, GenerationMixin):
|
|
1182 |
self.gradient_accumulation_steps = 1
|
1183 |
self.training_steps = 0
|
1184 |
self.tokenizer = AutoTokenizer.from_pretrained("Crystalcareai/Quiet-Star-Custom")
|
1185 |
-
self.streamer = TextStreamer(self.tokenizer)
|
1186 |
self.start_token_id = None
|
1187 |
self.end_token_id = None
|
1188 |
self.rm_initialized = False
|
@@ -2116,6 +2115,15 @@ class QuietForCausalLM(QuietPreTrainedModel, GenerationMixin):
|
|
2116 |
del start_embedding
|
2117 |
del end_embedding
|
2118 |
torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2119 |
|
2120 |
|
2121 |
return CausalLMOutputWithPast(
|
|
|
1182 |
self.gradient_accumulation_steps = 1
|
1183 |
self.training_steps = 0
|
1184 |
self.tokenizer = AutoTokenizer.from_pretrained("Crystalcareai/Quiet-Star-Custom")
|
|
|
1185 |
self.start_token_id = None
|
1186 |
self.end_token_id = None
|
1187 |
self.rm_initialized = False
|
|
|
2115 |
del start_embedding
|
2116 |
del end_embedding
|
2117 |
torch.cuda.empty_cache()
|
2118 |
+
|
2119 |
+
if streamer is not None:
|
2120 |
+
streamer_kwargs = {
|
2121 |
+
"generated_token_ids": input_ids,
|
2122 |
+
"past_key_values": outputs.past_key_values,
|
2123 |
+
"hidden_states": outputs.hidden_states,
|
2124 |
+
"attentions": outputs.attentions,
|
2125 |
+
}
|
2126 |
+
streamer.put(**streamer_kwargs)
|
2127 |
|
2128 |
|
2129 |
return CausalLMOutputWithPast(
|