init
Browse files- README.md +5 -4
- demo.py +1 -1
- modeling_eagle_chat.py +3 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
license:
|
3 |
pipeline_tag: image-text-to-text
|
4 |
library_name: transformers
|
5 |
base_model:
|
@@ -16,7 +16,8 @@ tags:
|
|
16 |
|
17 |
# Eagle-2
|
18 |
|
19 |
-
|
|
|
20 |
[\[π¨οΈ Chat Demo\]](http://eagle-vlm.xyz/) [\[π€ HF Demo\]](TODO)
|
21 |
## Introduction
|
22 |
|
@@ -57,7 +58,7 @@ We provide the following models:
|
|
57 |
| AI2D<sub>test</sub> | 57.1 | 64.1 | 69.3 | 74.7 |70.9|
|
58 |
| MMMU<sub>val</sub> | 31.4 | 36.7 | 40.9 |41.1|38.8|
|
59 |
| MMVet<sub>GPT-4-Turbo</sub> | 32.2 | 32.7 | 48.8 | 49.5|40.9| HallBench<sub>avg</sub> | 27.9 | 34.0 | 39.0 |**41.7**|35.3
|
60 |
-
| MathVista<sub>testmini</sub> |
|
61 |
| MMstar | 37.7 | 45.7 | 50.1|48.0|48.5|
|
62 |
|
63 |
|
@@ -66,7 +67,7 @@ We provide the following models:
|
|
66 |
|
67 |
|
68 |
|
69 |
-
We provide a [
|
70 |
- pure text input
|
71 |
- single image input
|
72 |
- multiple image input
|
|
|
1 |
---
|
2 |
+
license: cc-by-nc-4.0
|
3 |
pipeline_tag: image-text-to-text
|
4 |
library_name: transformers
|
5 |
base_model:
|
|
|
16 |
|
17 |
# Eagle-2
|
18 |
|
19 |
+
|
20 |
+
[\[π GitHub\]](https://github.com/NVlabs/EAGLE) [\[π Eagle2 Tech Report\]](TODO)
|
21 |
[\[π¨οΈ Chat Demo\]](http://eagle-vlm.xyz/) [\[π€ HF Demo\]](TODO)
|
22 |
## Introduction
|
23 |
|
|
|
58 |
| AI2D<sub>test</sub> | 57.1 | 64.1 | 69.3 | 74.7 |70.9|
|
59 |
| MMMU<sub>val</sub> | 31.4 | 36.7 | 40.9 |41.1|38.8|
|
60 |
| MMVet<sub>GPT-4-Turbo</sub> | 32.2 | 32.7 | 48.8 | 49.5|40.9| HallBench<sub>avg</sub> | 27.9 | 34.0 | 39.0 |**41.7**|35.3
|
61 |
+
| MathVista<sub>testmini</sub> | 33.8 | 37.7 | 43.2 |43.0|45.3|
|
62 |
| MMstar | 37.7 | 45.7 | 50.1|48.0|48.5|
|
63 |
|
64 |
|
|
|
67 |
|
68 |
|
69 |
|
70 |
+
We provide a [inference script](./demo.py) to help you quickly start using the model. We support different input types:
|
71 |
- pure text input
|
72 |
- single image input
|
73 |
- multiple image input
|
demo.py
CHANGED
@@ -390,7 +390,7 @@ class ModelWorker:
|
|
390 |
|
391 |
if __name__ == '__main__':
|
392 |
parser = argparse.ArgumentParser()
|
393 |
-
parser.add_argument('--model-path', type=str, default='/
|
394 |
parser.add_argument('--model-name', type=str, default='Eagle2-1B')
|
395 |
parser.add_argument('--device', type=str, default='cuda')
|
396 |
parser.add_argument('--load-8bit', action='store_true')
|
|
|
390 |
|
391 |
if __name__ == '__main__':
|
392 |
parser = argparse.ArgumentParser()
|
393 |
+
parser.add_argument('--model-path', type=str, default='nvidia/Eagle2-1B')
|
394 |
parser.add_argument('--model-name', type=str, default='Eagle2-1B')
|
395 |
parser.add_argument('--device', type=str, default='cuda')
|
396 |
parser.add_argument('--load-8bit', action='store_true')
|
modeling_eagle_chat.py
CHANGED
@@ -25,6 +25,9 @@ from .flash_attention import *
|
|
25 |
from .multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModel
|
26 |
from .multi_backbone_channel_concatenation_encoder import MultiBackboneChannelConcatenationVisionTower
|
27 |
from .configuration_multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModelConfig
|
|
|
|
|
|
|
28 |
|
29 |
logger = logging.get_logger(__name__)
|
30 |
|
|
|
25 |
from .multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModel
|
26 |
from .multi_backbone_channel_concatenation_encoder import MultiBackboneChannelConcatenationVisionTower
|
27 |
from .configuration_multi_backbone_channel_concatentation_model import MultiBackboneChannelConcatenationVisionModelConfig
|
28 |
+
from .siglip_vision_tower import SiglipVisionTower
|
29 |
+
from .convnext_encoder import ConvNextVisionTower
|
30 |
+
from .convnext import ConvNeXt
|
31 |
|
32 |
logger = logging.get_logger(__name__)
|
33 |
|