chenz53 commited on
Commit
9ad9547
·
verified ·
1 Parent(s): 5b636d2

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +78 -3
  2. config.json +30 -0
  3. model.safetensors +3 -0
README.md CHANGED
@@ -1,3 +1,78 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - masked-image-modeling
5
+ - generated_from_trainer
6
+ ---
7
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
8
+ should probably proofread and complete it, then remove this comment. -->
9
+
10
+ # smb-vision-base-1029
11
+
12
+ This model is trained from scratch using [VideoMAE](https://huggingface.co/docs/transformers/en/model_doc/videomae) on over 4.7k CT volumes.
13
+
14
+ ## Model description
15
+
16
+ More information needed
17
+
18
+ ## Intended uses & limitations
19
+
20
+ More information needed
21
+
22
+ ## Training and evaluation data
23
+
24
+ More information needed
25
+
26
+ ## Training procedure
27
+
28
+ ### Training hyperparameters
29
+
30
+ The following hyperparameters were used during training:
31
+ - learning_rate: 3e-04
32
+ - train_batch_size: 32
33
+ - eval_batch_size: 1
34
+ - seed: 42
35
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
36
+ - lr_scheduler_type: cosine
37
+ - num_epochs: 30.0
38
+
39
+ ### Training results
40
+
41
+ {
42
+ "_runtime": 54805.860011105,
43
+ "_step": 4351,
44
+ "eval/runtime": 17.8428,
45
+ "eval/samples_per_second": 2.578,
46
+ "eval/steps_per_second": 2.578,
47
+ "total_flos": 3.8084565648770335e+21,
48
+ "train/epoch": 30,
49
+ "train/global_step": 4350,
50
+ "train/grad_norm": 0.0735374316573143,
51
+ "train/learning_rate": 0,
52
+ "train/loss": 0.5736,
53
+ "train_loss": 0.5022664608695041,
54
+ "train_runtime": 54785.1298,
55
+ "train_samples_per_second": 2.527,
56
+ "train_steps_per_second": 0.079
57
+ }
58
+
59
+
60
+ ### Framework versions
61
+
62
+ - Transformers 4.46.0
63
+ - Pytorch 2.5.0
64
+ - Datasets 3.0.2
65
+ - Tokenizers 0.20.1
66
+
67
+ ### How to use
68
+ ```python
69
+ # load data using `dataload.py`
70
+
71
+ model = VideoMAEForPreTraining.from_pretrained(
72
+ standardmodelbio/smb-vision-base,
73
+ trust_remote_code=True,
74
+ )
75
+
76
+ embedding = model.videomae(batch["image"])
77
+
78
+ ```
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "VideoMAEForPreTraining"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "attn_implementation": "flash_attention_2",
7
+ "decoder_hidden_size": 512,
8
+ "decoder_intermediate_size": 2048,
9
+ "decoder_num_attention_heads": 16,
10
+ "decoder_num_hidden_layers": 8,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 1024,
14
+ "image_size": 384,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 4096,
17
+ "layer_norm_eps": 1e-12,
18
+ "model_type": "videomae",
19
+ "norm_pix_loss": true,
20
+ "num_attention_heads": 16,
21
+ "num_channels": 1,
22
+ "num_frames": 320,
23
+ "num_hidden_layers": 24,
24
+ "patch_size": 16,
25
+ "qkv_bias": true,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.46.3",
28
+ "tubelet_size": 16,
29
+ "use_mean_pooling": true
30
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94debd4dd3e0946a93f500657d00485055676e6a5055d168d67f5213abd7c66
3
+ size 1337348480