Crystalcareai
commited on
Update modeling_quiet.py
Browse files- modeling_quiet.py +2 -2
modeling_quiet.py
CHANGED
@@ -1110,7 +1110,7 @@ class QuietModel(QuietPreTrainedModel):
|
|
1110 |
next_decoder_cache = None
|
1111 |
|
1112 |
for decoder_layer in self.layers:
|
1113 |
-
print(f"Hidden states contains NaN before layer {
|
1114 |
if output_hidden_states:
|
1115 |
all_hidden_states += (hidden_states,)
|
1116 |
|
@@ -1168,7 +1168,7 @@ def nonzero_mean(x, axis=None):
|
|
1168 |
|
1169 |
def loss_mean(x):
|
1170 |
return x.sum() / (x != 0).sum()
|
1171 |
-
print(f"Hidden states contains NaN after layer {
|
1172 |
|
1173 |
class QuietForCausalLM(QuietPreTrainedModel):
|
1174 |
_tied_weights_keys = ["lm_head.weight"]
|
|
|
1110 |
next_decoder_cache = None
|
1111 |
|
1112 |
for decoder_layer in self.layers:
|
1113 |
+
print(f"Hidden states contains NaN before layer {id}:", torch.isnan(hidden_states).any().item())
|
1114 |
if output_hidden_states:
|
1115 |
all_hidden_states += (hidden_states,)
|
1116 |
|
|
|
1168 |
|
1169 |
def loss_mean(x):
|
1170 |
return x.sum() / (x != 0).sum()
|
1171 |
+
print(f"Hidden states contains NaN after layer {id}:", torch.isnan(hidden_states).any().item())
|
1172 |
|
1173 |
class QuietForCausalLM(QuietPreTrainedModel):
|
1174 |
_tied_weights_keys = ["lm_head.weight"]
|