Upload folder using huggingface_hub
Browse files- baseline-10/cfg.json +1 -0
- baseline-10/sae.safetensors +3 -0
- baseline-6/cfg.json +1 -0
- baseline-6/sae.safetensors +3 -0
- baseline-7/cfg.json +1 -0
- baseline-7/sae.safetensors +3 -0
- baseline-8/cfg.json +1 -0
- baseline-8/sae.safetensors +3 -0
- baseline-9/cfg.json +1 -0
- baseline-9/sae.safetensors +3 -0
- config.json +1 -0
- k6-c2/cfg.json +1 -0
- k6-c2/sae.safetensors +3 -0
- l1_scheduler.pt +3 -0
- scaling_factors.pt +3 -0
- state.pt +3 -0
baseline-10/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
|
baseline-10/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10f4d6bcb0127df8946d485da8d51b58c3ba25979d4d9dda930d73fc1a26b63e
|
3 |
+
size 679781800
|
baseline-6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
|
baseline-6/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd009f558bfc4ed8f24187202e7ecc0a4f265f7f38bf935ff5d24b6833e8af36
|
3 |
+
size 679781800
|
baseline-7/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
|
baseline-7/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:364985c785cd47971bb9bddfcedf2f16dd0fc5b962dc7835dbb9a99d90e15035
|
3 |
+
size 679781800
|
baseline-8/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
|
baseline-8/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d28ecd7f1eca0c53e4d18335705a7b8e827091bdf148c7089f69f3518d52a13c
|
3 |
+
size 679781800
|
baseline-9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
|
baseline-9/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e43db186c456978e1264611d7ca9af935d0146a1fd4bc964750f669db4c43091
|
3 |
+
size 679781800
|
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false}, "batch_size": 4, "max_seq_len": 1024, "num_training_tokens": 1000000000, "cycle_iterator": true, "grad_acc_steps": 1, "micro_acc_steps": 1, "adam_8bit": false, "adam_epsilon": 1e-08, "adam_betas": [0.0, 0.999], "lr": 0.0007, "lr_scheduler_name": "constant", "lr_warmup_steps": 0.01, "l1_coefficient": 0.5, "l1_warmup_steps": 0.1, "use_l2_loss": true, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6", "layers.7", "layers.8", "layers.9", "layers.10"], "layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "layer_stride": 1, "distribute_modules": false, "save_every": 10000, "normalize_activations": 1, "num_norm_estimation_tokens": 1000000, "clusters": {"k6-c2": [6, 7, 8, 9, 10], "baseline-6": [6], "baseline-7": [7], "baseline-8": [8], "baseline-9": [9], "baseline-10": [10]}, "cluster_hookpoints": {"k6-c2": ["layers.6", "layers.7", "layers.8", "layers.9", "layers.10"], "baseline-6": ["layers.6"], "baseline-7": ["layers.7"], "baseline-8": ["layers.8"], "baseline-9": ["layers.9"], "baseline-10": ["layers.10"]}, "hook": null, "resume_from": null, "keep_last_n": 1, "log_to_wandb": true, "run_name": "checkpoints-clusters/google/gemma-2-2b-1024-jr-lambda-0.5-target-L0-128-lr-0.0007", "wandb_log_frequency": 1}
|
k6-c2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": -1, "multi_topk": false, "jumprelu": true, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": 128, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
|
k6-c2/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf16a940ecf90b12dfd566b2841b6dc290b779da2a1e5e8d99f0e9710ec388be
|
3 |
+
size 679781800
|
l1_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b575e18d10cb62165304d5a761c43c2dde3e6998b3bd2143b6d806359b0cbb38
|
3 |
+
size 1012
|
scaling_factors.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f621a6ae7439d1b1b67edf4947f85261333b1046b968c499ec736e1a2c5507e7
|
3 |
+
size 1152
|
state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16190db71dff79627997af579d6894c29bc703b494690b4fcf6caa0535f7ac90
|
3 |
+
size 1771636
|