istvanj commited on
Commit
95d0749
·
verified ·
1 Parent(s): 2e0b671

Training in progress, step 56

Browse files
README.md CHANGED
@@ -7,14 +7,14 @@ tags:
7
  - dnb
8
  - generated_from_trainer
9
  model-index:
10
- - name: musicgen-melody-lora-dnb-colab-gs-8-lr-1e-05
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # musicgen-melody-lora-dnb-colab-gs-8-lr-1e-05
18
 
19
  This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on the istvanj/ncs-dnb dataset.
20
 
@@ -35,7 +35,7 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 1e-05
39
  - train_batch_size: 2
40
  - eval_batch_size: 8
41
  - seed: 42
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
43
  - total_train_batch_size: 16
44
  - optimizer: Use adamw_torch with betas=(0.9,0.99) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: linear
46
- - num_epochs: 8
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
 
7
  - dnb
8
  - generated_from_trainer
9
  model-index:
10
+ - name: musicgen-melody-lora-dnb-colab-gs-3-lr-0.0001
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # musicgen-melody-lora-dnb-colab-gs-3-lr-0.0001
18
 
19
  This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on the istvanj/ncs-dnb dataset.
20
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 0.0001
39
  - train_batch_size: 2
40
  - eval_batch_size: 8
41
  - seed: 42
 
43
  - total_train_batch_size: 16
44
  - optimizer: Use adamw_torch with betas=(0.9,0.99) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 3
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
adapter_config.json CHANGED
@@ -23,22 +23,22 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "lm_heads.2",
28
- "lm_heads.3",
29
- "lm_heads.1",
30
  "embed_tokens.0",
31
  "q_proj",
32
- "embed_tokens.2",
33
- "embed_tokens.3",
34
- "out_proj",
35
- "fc1",
36
  "k_proj",
37
- "embed_tokens.1",
 
38
  "audio_enc_to_dec_proj",
 
39
  "fc2",
40
  "enc_to_dec_proj",
41
- "lm_heads.0"
 
 
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "embed_tokens.1",
27
+ "fc1",
28
+ "lm_heads.0",
29
+ "out_proj",
30
  "embed_tokens.0",
31
  "q_proj",
 
 
 
 
32
  "k_proj",
33
+ "v_proj",
34
+ "lm_heads.2",
35
  "audio_enc_to_dec_proj",
36
+ "lm_heads.3",
37
  "fc2",
38
  "enc_to_dec_proj",
39
+ "embed_tokens.2",
40
+ "embed_tokens.3",
41
+ "lm_heads.1"
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4ed8c891f22c4c9a7974cf2ad6e1bb84abbf140fe84f1372c0bd25752e57452
3
  size 87103456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b3f8b7b1bbcdc6dc3f718146d672eb904d5c9c37afb3e9e0cf606c6eddfcdef
3
  size 87103456
trainer_state.json CHANGED
@@ -1,223 +1,97 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.0,
5
  "eval_steps": 500,
6
- "global_step": 56,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.26666666666666666,
13
- "grad_norm": 7.9753594398498535,
14
- "learning_rate": 9.642857142857144e-06,
15
- "loss": 9.7008,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.5333333333333333,
20
- "grad_norm": 8.494783401489258,
21
- "learning_rate": 9.285714285714288e-06,
22
- "loss": 9.6852,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.8,
27
- "grad_norm": 8.483810424804688,
28
- "learning_rate": 8.92857142857143e-06,
29
- "loss": 9.6854,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 1.0,
34
- "grad_norm": 4.711551666259766,
35
- "learning_rate": 8.571428571428571e-06,
36
- "loss": 7.2231,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 1.2666666666666666,
41
- "grad_norm": 8.524971961975098,
42
- "learning_rate": 8.214285714285714e-06,
43
- "loss": 9.6463,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.5333333333333332,
48
- "grad_norm": 8.587374687194824,
49
- "learning_rate": 8.035714285714286e-06,
50
- "loss": 9.627,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 1.8,
55
- "grad_norm": 9.312500953674316,
56
- "learning_rate": 7.67857142857143e-06,
57
- "loss": 9.5752,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 2.0,
62
- "grad_norm": 4.863954067230225,
63
- "learning_rate": 7.321428571428572e-06,
64
- "loss": 7.2151,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 2.2666666666666666,
69
- "grad_norm": 8.715095520019531,
70
- "learning_rate": 7.1428571428571436e-06,
71
- "loss": 9.5969,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 2.533333333333333,
76
- "grad_norm": 9.27696704864502,
77
- "learning_rate": 6.785714285714287e-06,
78
- "loss": 9.548,
79
  "step": 20
80
  },
81
  {
82
- "epoch": 2.8,
83
- "grad_norm": 9.659817695617676,
84
- "learning_rate": 6.4285714285714295e-06,
85
- "loss": 9.5663,
86
- "step": 22
87
- },
88
- {
89
- "epoch": 3.0,
90
- "grad_norm": 5.58745813369751,
91
- "learning_rate": 6.071428571428571e-06,
92
- "loss": 7.1335,
93
- "step": 24
94
- },
95
- {
96
- "epoch": 3.2666666666666666,
97
- "grad_norm": 10.51404094696045,
98
- "learning_rate": 5.7142857142857145e-06,
99
- "loss": 9.5266,
100
- "step": 26
101
- },
102
- {
103
- "epoch": 3.533333333333333,
104
- "grad_norm": 10.135522842407227,
105
- "learning_rate": 5.357142857142857e-06,
106
- "loss": 9.5168,
107
- "step": 28
108
- },
109
- {
110
- "epoch": 3.8,
111
- "grad_norm": 10.956225395202637,
112
- "learning_rate": 5e-06,
113
- "loss": 9.5157,
114
- "step": 30
115
- },
116
- {
117
- "epoch": 4.0,
118
- "grad_norm": 5.393774032592773,
119
- "learning_rate": 4.642857142857144e-06,
120
- "loss": 7.0646,
121
- "step": 32
122
- },
123
- {
124
- "epoch": 4.266666666666667,
125
- "grad_norm": 10.676840782165527,
126
- "learning_rate": 4.2857142857142855e-06,
127
- "loss": 9.4731,
128
- "step": 34
129
- },
130
- {
131
- "epoch": 4.533333333333333,
132
- "grad_norm": 10.430346488952637,
133
- "learning_rate": 3.928571428571429e-06,
134
- "loss": 9.4459,
135
- "step": 36
136
- },
137
- {
138
- "epoch": 4.8,
139
- "grad_norm": 10.816131591796875,
140
- "learning_rate": 3.5714285714285718e-06,
141
- "loss": 9.4447,
142
- "step": 38
143
- },
144
- {
145
- "epoch": 5.0,
146
- "grad_norm": 6.065415859222412,
147
- "learning_rate": 3.2142857142857147e-06,
148
- "loss": 7.0511,
149
- "step": 40
150
- },
151
- {
152
- "epoch": 5.266666666666667,
153
- "grad_norm": 13.455761909484863,
154
- "learning_rate": 2.8571428571428573e-06,
155
- "loss": 9.4171,
156
- "step": 42
157
- },
158
- {
159
- "epoch": 5.533333333333333,
160
- "grad_norm": 11.359918594360352,
161
- "learning_rate": 2.5e-06,
162
- "loss": 9.4188,
163
- "step": 44
164
- },
165
- {
166
- "epoch": 5.8,
167
- "grad_norm": 13.576141357421875,
168
- "learning_rate": 2.1428571428571427e-06,
169
- "loss": 9.3739,
170
- "step": 46
171
- },
172
- {
173
- "epoch": 6.0,
174
- "grad_norm": 6.167062759399414,
175
- "learning_rate": 1.7857142857142859e-06,
176
- "loss": 7.0424,
177
- "step": 48
178
- },
179
- {
180
- "epoch": 6.266666666666667,
181
- "grad_norm": 13.524323463439941,
182
- "learning_rate": 1.4285714285714286e-06,
183
- "loss": 9.3775,
184
- "step": 50
185
- },
186
- {
187
- "epoch": 6.533333333333333,
188
- "grad_norm": 20.678451538085938,
189
- "learning_rate": 1.0714285714285714e-06,
190
- "loss": 9.3592,
191
- "step": 52
192
- },
193
- {
194
- "epoch": 6.8,
195
- "grad_norm": 12.042393684387207,
196
- "learning_rate": 7.142857142857143e-07,
197
- "loss": 9.3678,
198
- "step": 54
199
- },
200
- {
201
- "epoch": 7.0,
202
- "grad_norm": 6.3625993728637695,
203
- "learning_rate": 3.5714285714285716e-07,
204
- "loss": 7.0451,
205
- "step": 56
206
- },
207
- {
208
- "epoch": 7.0,
209
- "step": 56,
210
- "total_flos": 216424602812184.0,
211
- "train_loss": 8.915825281824384,
212
- "train_runtime": 1374.5108,
213
- "train_samples_per_second": 0.698,
214
- "train_steps_per_second": 0.041
215
  }
216
  ],
217
  "logging_steps": 2,
218
- "max_steps": 56,
219
  "num_input_tokens_seen": 0,
220
- "num_train_epochs": 8,
221
  "save_steps": 500,
222
  "stateful_callbacks": {
223
  "TrainerControl": {
@@ -231,7 +105,7 @@
231
  "attributes": {}
232
  }
233
  },
234
- "total_flos": 216424602812184.0,
235
  "train_batch_size": 2,
236
  "trial_name": null,
237
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6666666666666665,
5
  "eval_steps": 500,
6
+ "global_step": 21,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.26666666666666666,
13
+ "grad_norm": 7.614686012268066,
14
+ "learning_rate": 9.047619047619048e-05,
15
+ "loss": 9.6721,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.5333333333333333,
20
+ "grad_norm": 10.107391357421875,
21
+ "learning_rate": 8.095238095238096e-05,
22
+ "loss": 9.4969,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.8,
27
+ "grad_norm": 11.758378028869629,
28
+ "learning_rate": 7.142857142857143e-05,
29
+ "loss": 9.3036,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "grad_norm": 8.324995994567871,
35
+ "learning_rate": 6.19047619047619e-05,
36
+ "loss": 6.8191,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 1.2666666666666666,
41
+ "grad_norm": 15.543440818786621,
42
+ "learning_rate": 5.2380952380952384e-05,
43
+ "loss": 8.8411,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.5333333333333332,
48
+ "grad_norm": 16.783220291137695,
49
+ "learning_rate": 4.2857142857142856e-05,
50
+ "loss": 8.5329,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 1.8,
55
+ "grad_norm": 19.108684539794922,
56
+ "learning_rate": 3.3333333333333335e-05,
57
+ "loss": 8.2567,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 2.0,
62
+ "grad_norm": 8.789897918701172,
63
+ "learning_rate": 2.380952380952381e-05,
64
+ "loss": 6.1492,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 2.2666666666666666,
69
+ "grad_norm": 17.98725128173828,
70
+ "learning_rate": 1.4285714285714285e-05,
71
+ "loss": 8.0423,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 2.533333333333333,
76
+ "grad_norm": 14.338889122009277,
77
+ "learning_rate": 4.7619047619047615e-06,
78
+ "loss": 7.9496,
79
  "step": 20
80
  },
81
  {
82
+ "epoch": 2.6666666666666665,
83
+ "step": 21,
84
+ "total_flos": 82187025809664.0,
85
+ "train_loss": 8.28742908296131,
86
+ "train_runtime": 577.7205,
87
+ "train_samples_per_second": 0.623,
88
+ "train_steps_per_second": 0.036
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 2,
92
+ "max_steps": 21,
93
  "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 3,
95
  "save_steps": 500,
96
  "stateful_callbacks": {
97
  "TrainerControl": {
 
105
  "attributes": {}
106
  }
107
  },
108
+ "total_flos": 82187025809664.0,
109
  "train_batch_size": 2,
110
  "trial_name": null,
111
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:778dc524f55afd59323688704b0726b11953ccd3eb2b83634379c146f19e65d7
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c34c13b6fa5c21e9835c2ac37db1309b556f58be71c1278a44d7a2d2fd254c4
3
  size 5560