Zhiding commited on
Commit
921f508
Β·
1 Parent(s): a5f8592
Files changed (3) hide show
  1. README.md +5 -4
  2. demo.py +1 -1
  3. modeling_eagle_chat.py +3 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- license: mit
3
  pipeline_tag: image-text-to-text
4
  library_name: transformers
5
  base_model:
@@ -16,7 +16,8 @@ tags:
16
 
17
  # Eagle-2
18
 
19
- [\[πŸ“‚ GitHub\]](https://github.com/NVlabs/EAGLE) [\[πŸ“œ Eagle 2\]](TODO)
 
20
  [\[πŸ—¨οΈ Chat Demo\]](http://eagle-vlm.xyz/) [\[πŸ€— HF Demo\]](TODO)
21
  ## Introduction
22
 
@@ -57,7 +58,7 @@ We provide the following models:
57
  | AI2D<sub>test</sub> | 57.1 | 64.1 | 69.3 | 74.7 |70.9|
58
  | MMMU<sub>val</sub> | 31.4 | 36.7 | 40.9 |41.1|38.8|
59
  | MMVet<sub>GPT-4-Turbo</sub> | 32.2 | 32.7 | 48.8 | 49.5|40.9| HallBench<sub>avg</sub> | 27.9 | 34.0 | 39.0 |**41.7**|35.3
60
- | MathVista<sub>testmini</sub> | 3.8 | 37.7 | 43.2 |43.0|45.3|
61
  | MMstar | 37.7 | 45.7 | 50.1|48.0|48.5|
62
 
63
 
@@ -66,7 +67,7 @@ We provide the following models:
66
 
67
 
68
 
69
- We provide a [demo inference script](./demo.py) to help you quickly start using the model. We support different input types:
70
  - pure text input
71
  - single image input
72
  - multiple image input
 
1
  ---
2
+ license: cc-by-nc-4.0
3
  pipeline_tag: image-text-to-text
4
  library_name: transformers
5
  base_model:
 
16
 
17
  # Eagle-2
18
 
19
+
20
+ [\[πŸ“‚ GitHub\]](https://github.com/NVlabs/EAGLE) [\[πŸ“œ Eagle2 Tech Report\]](TODO)
21
  [\[πŸ—¨οΈ Chat Demo\]](http://eagle-vlm.xyz/) [\[πŸ€— HF Demo\]](TODO)
22
  ## Introduction
23
 
 
58
  | AI2D<sub>test</sub> | 57.1 | 64.1 | 69.3 | 74.7 |70.9|
59
  | MMMU<sub>val</sub> | 31.4 | 36.7 | 40.9 |41.1|38.8|
60
  | MMVet<sub>GPT-4-Turbo</sub> | 32.2 | 32.7 | 48.8 | 49.5|40.9| HallBench<sub>avg</sub> | 27.9 | 34.0 | 39.0 |**41.7**|35.3
61
+ | MathVista<sub>testmini</sub> | 33.8 | 37.7 | 43.2 |43.0|45.3|
62
  | MMstar | 37.7 | 45.7 | 50.1|48.0|48.5|
63
 
64
 
 
67
 
68
 
69
 
70
+ We provide a [inference script](./demo.py) to help you quickly start using the model. We support different input types:
71
  - pure text input
72
  - single image input
73
  - multiple image input
demo.py CHANGED
@@ -390,7 +390,7 @@ class ModelWorker:
390
 
391
  if __name__ == '__main__':
392
  parser = argparse.ArgumentParser()
393
- parser.add_argument('--model-path', type=str, default='/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/Eagle2-1B')
394
  parser.add_argument('--model-name', type=str, default='Eagle2-1B')
395
  parser.add_argument('--device', type=str, default='cuda')
396
  parser.add_argument('--load-8bit', action='store_true')
 
390
 
391
  if __name__ == '__main__':
392
  parser = argparse.ArgumentParser()
393
+ parser.add_argument('--model-path', type=str, default='nvidia/Eagle2-1B')
394
  parser.add_argument('--model-name', type=str, default='Eagle2-1B')
395
  parser.add_argument('--device', type=str, default='cuda')
396
  parser.add_argument('--load-8bit', action='store_true')
modeling_eagle_chat.py CHANGED
@@ -25,6 +25,9 @@ from .flash_attention import *
25
  from .multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModel
26
  from .multi_backbone_channel_concatenation_encoder import MultiBackboneChannelConcatenationVisionTower
27
  from .configuration_multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModelConfig
 
 
 
28
 
29
  logger = logging.get_logger(__name__)
30
 
 
25
  from .multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModel
26
  from .multi_backbone_channel_concatenation_encoder import MultiBackboneChannelConcatenationVisionTower
27
  from .configuration_multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModelConfig
28
+ from .siglip_vision_tower import SiglipVisionTower
29
+ from .convnext_encoder import ConvNextVisionTower
30
+ from .convnext import ConvNeXt
31
 
32
  logger = logging.get_logger(__name__)
33