Upload folder using huggingface_hub
Browse files- 0-res-sm/cfg.json +1 -21
- 0-res-sm/sparsity.safetensors +3 -0
- 1-res-sm/cfg.json +1 -21
- 1-res-sm/sparsity.safetensors +3 -0
- 2-res-sm/cfg.json +1 -21
- 2-res-sm/sparsity.safetensors +3 -0
- 3-res-sm/cfg.json +1 -21
- 3-res-sm/sparsity.safetensors +3 -0
- 4-res-sm/cfg.json +1 -21
- 4-res-sm/sparsity.safetensors +3 -0
- 5-res-sm/cfg.json +1 -21
- 5-res-sm/sparsity.safetensors +3 -0
- e-res-sm/cfg.json +1 -21
- e-res-sm/sparsity.safetensors +3 -0
0-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.0.hook_resid_post",
|
4 |
-
"hook_layer": 0,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.0.hook_resid_post", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbb5e02251657a3d3176d1059be408d40743923280b21349e281e6607eef9ac2
|
3 |
+
size 131152
|
1-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.1.hook_resid_post",
|
4 |
-
"hook_layer": 1,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.1.hook_resid_post", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab4d2869a4215fb21c2232f086f2447f520ed553b7f4a38c2688cd5317893ca7
|
3 |
+
size 131152
|
2-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.2.hook_resid_post",
|
4 |
-
"hook_layer": 2,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.2.hook_resid_post", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eb79aac97f83d2de383d4cd2d701fea9077c7dee85a6e8668d323d23918cf2b
|
3 |
+
size 131152
|
3-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.3.hook_resid_post",
|
4 |
-
"hook_layer": 3,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.3.hook_resid_post", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5992a0d5a944b0090147a083bfd53d802e1757708644d19378122f3e2ef67878
|
3 |
+
size 131152
|
4-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.4.hook_resid_post",
|
4 |
-
"hook_layer": 4,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.4.hook_resid_post", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3de19ed9e6a674abe9afd61ef2851926212cb87105241ec32409ced3ed4676dc
|
3 |
+
size 131152
|
5-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.5.hook_resid_post",
|
4 |
-
"hook_layer": 5,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.5.hook_resid_post", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94356d7e0bc1268bcbf3fb432f2bf5a3be98187b81069b65b83652834163d393
|
3 |
+
size 131152
|
e-res-sm/cfg.json
CHANGED
@@ -1,21 +1 @@
|
|
1 |
-
{
|
2 |
-
"model_name": "pythia-70m-deduped",
|
3 |
-
"hook_name": "blocks.0.hook_resid_pre",
|
4 |
-
"hook_layer": 0,
|
5 |
-
"hook_head_index": null,
|
6 |
-
"d_in": 512,
|
7 |
-
"d_sae": 32768,
|
8 |
-
"dtype": "torch.float32",
|
9 |
-
"device": "mps",
|
10 |
-
"dataset_path": "EleutherAI/the_pile_deduplicated",
|
11 |
-
"context_size": 128,
|
12 |
-
"architecture": "standard",
|
13 |
-
"activation_fn_str": "relu",
|
14 |
-
"activation_fn_kwargs": null,
|
15 |
-
"apply_b_dec_to_input": true,
|
16 |
-
"finetuning_scaling_factor": false,
|
17 |
-
"sae_lens_training_version": null,
|
18 |
-
"prepend_bos": false,
|
19 |
-
"dataset_trust_remote_code": true,
|
20 |
-
"normalize_activations": "none"
|
21 |
-
}
|
|
|
1 |
+
{"architecture": "standard", "d_in": 512, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "pythia-70m-deduped", "hook_name": "blocks.0.hook_resid_pre", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "relu", "activation_fn_kwargs": {}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "EleutherAI/the_pile_deduplicated", "dataset_trust_remote_code": true, "context_size": 128, "normalize_activations": "none"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
e-res-sm/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91075cc05f2b2962ab89ac30ba55f9deb606a79fe50c67301fa82dfad86e5e8c
|
3 |
+
size 131152
|