tyleryzhu commited on
Commit
05d49e3
·
verified ·
1 Parent(s): 5ac6c68

Delete merv-base

Browse files
merv-base/checkpoints/latest-checkpoint.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a7a574a43de24b167e5c8cf99a779bceb0b6036ceeb9cdaeae15d9ab81f54a7
3
- size 27190883468
 
 
 
 
merv-base/config.json DELETED
@@ -1,71 +0,0 @@
1
- {
2
- "dataset": {
3
- "align_stage_components": [
4
- "download/videollava/valley_llavaimage.json",
5
- "download/videollava"
6
- ],
7
- "dataset_root_dir": "data",
8
- "finetune_stage_components": [
9
- "download/videollava/videochatgpt_llavaimage_tune.json",
10
- "download/videollava"
11
- ],
12
- "type": "videollava"
13
- },
14
- "hf_token": ".hf_token",
15
- "model": {
16
- "feature_fusion": "cross_attention_avg_lq",
17
- "align_epochs": 1,
18
- "align_global_batch_size": 256,
19
- "align_learning_rate": 0.001,
20
- "align_lr_scheduler_type": "linear-warmup+cosine-decay",
21
- "align_max_grad_norm": 1.0,
22
- "align_max_steps": null,
23
- "align_per_device_batch_size": 16,
24
- "align_train_strategy": "fsdp-shard-grad-op",
25
- "align_warmup_ratio": 0.03,
26
- "align_weight_decay": 0.0,
27
- "arch_specifier": "no-align+3davg+linear",
28
- "enable_gradient_checkpointing": true,
29
- "enable_mixed_precision_training": true,
30
- "finetune_epochs": 1,
31
- "finetune_global_batch_size": 128,
32
- "finetune_learning_rate": 2e-05,
33
- "finetune_lr_scheduler_type": "linear-warmup+cosine-decay",
34
- "finetune_max_grad_norm": 1.0,
35
- "finetune_max_steps": null,
36
- "finetune_per_device_batch_size": 8,
37
- "finetune_train_strategy": "fsdp-full-shard",
38
- "finetune_warmup_ratio": 0.03,
39
- "finetune_weight_decay": 0.1,
40
- "image_resize_strategy": "resize-naive",
41
- "llm_backbone_id": "llama2-7b-pure",
42
- "llm_max_length": 2048,
43
- "model_id": "merv-base",
44
- "num_frames": [
45
- 16,
46
- 16,
47
- 32,
48
- 16
49
- ],
50
- "projector_token_length": 64,
51
- "reduce_in_full_precision": false,
52
- "type": "merv-base",
53
- "video_backbone_ids": [
54
- "languagebind-video-noclass",
55
- "dinov2-video-all-tokens",
56
- "vivit-google-b-all-no-cls-16frames",
57
- "siglip-vit-b16-224px-all-no-cls"
58
- ],
59
- "visual_feature_length": 1024
60
- },
61
- "pretrained_checkpoint": null,
62
- "run_id": "merv-base",
63
- "run_root_dir": "runs",
64
- "seed": 7,
65
- "stage": "finetune",
66
- "trackers": [
67
- "jsonl",
68
- "wandb"
69
- ],
70
- "type": "merv-base"
71
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
merv-base/config.yaml DELETED
@@ -1,62 +0,0 @@
1
- dataset:
2
- align_stage_components:
3
- - download/videollava/valley_llavaimage.json
4
- - download/videollava
5
- dataset_root_dir: data
6
- finetune_stage_components:
7
- - download/videollava/videochatgpt_llavaimage_tune.json
8
- - download/videollava
9
- type: videollava
10
- hf_token: .hf_token
11
- model:
12
- align_epochs: 1
13
- align_global_batch_size: 256
14
- align_learning_rate: 0.001
15
- align_lr_scheduler_type: linear-warmup+cosine-decay
16
- align_max_grad_norm: 1.0
17
- align_max_steps: null
18
- align_per_device_batch_size: 16
19
- align_train_strategy: fsdp-shard-grad-op
20
- align_warmup_ratio: 0.03
21
- align_weight_decay: 0.0
22
- arch_specifier: no-align+3davg+linear
23
- enable_gradient_checkpointing: true
24
- enable_mixed_precision_training: true
25
- feature_fusion: cross_attention_avg_lq
26
- finetune_epochs: 1
27
- finetune_global_batch_size: 128
28
- finetune_learning_rate: 2.0e-05
29
- finetune_lr_scheduler_type: linear-warmup+cosine-decay
30
- finetune_max_grad_norm: 1.0
31
- finetune_max_steps: null
32
- finetune_per_device_batch_size: 8
33
- finetune_train_strategy: fsdp-full-shard
34
- finetune_warmup_ratio: 0.03
35
- finetune_weight_decay: 0.1
36
- image_resize_strategy: resize-naive
37
- llm_backbone_id: llama2-7b-pure
38
- llm_max_length: 2048
39
- model_id: merv-base
40
- num_frames:
41
- - 16
42
- - 16
43
- - 32
44
- - 16
45
- projector_token_length: 64
46
- reduce_in_full_precision: false
47
- type: merv-base
48
- video_backbone_ids:
49
- - languagebind-video-noclass
50
- - dinov2-video-all-tokens
51
- - vivit-google-b-all-no-cls-16frames
52
- - siglip-vit-b16-224px-all-no-cls
53
- visual_feature_length: 1024
54
- pretrained_checkpoint: null
55
- run_id: merv-base
56
- run_root_dir: runs
57
- seed: 7
58
- stage: finetune
59
- trackers:
60
- - jsonl
61
- - wandb
62
- type: merv-base