Update README.md
Browse files
README.md
CHANGED
@@ -58,11 +58,11 @@ You are Dolphin, a helpful AI assistant.<|im_end|>
|
|
58 |
```python
|
59 |
from huggingface_hub import hf_hub_download
|
60 |
|
61 |
-
hf_hub_download(repo_id="
|
62 |
-
hf_hub_download(repo_id="
|
63 |
-
hf_hub_download(repo_id="
|
64 |
-
hf_hub_download(repo_id="
|
65 |
-
hf_hub_download(repo_id="
|
66 |
```
|
67 |
|
68 |
**Create a model**
|
@@ -71,7 +71,7 @@ hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="processing_llava.py", l
|
|
71 |
from modeling_llava import LlavaForConditionalGeneration
|
72 |
import torch
|
73 |
|
74 |
-
model = LlavaForConditionalGeneration.from_pretrained("
|
75 |
model = model.to("cuda")
|
76 |
```
|
77 |
|
@@ -81,7 +81,7 @@ model = model.to("cuda")
|
|
81 |
from transformers import AutoTokenizer
|
82 |
from processing_llava import LlavaProcessor, OpenCLIPImageProcessor
|
83 |
|
84 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
85 |
image_processor = OpenCLIPImageProcessor(model.config.preprocess_config)
|
86 |
processor = LlavaProcessor(image_processor, tokenizer)
|
87 |
```
|
@@ -123,27 +123,3 @@ import torch
|
|
123 |
with torch.inference_mode():
|
124 |
output = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.4, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id)
|
125 |
```
|
126 |
-
|
127 |
-
## Benchmarks
|
128 |
-
|
129 |
-
- TextVQA - 38.59%
|
130 |
-
- GQA - 49.6%
|
131 |
-
- VQAv2 - 64.24%
|
132 |
-
- VizWiz - 24.88%
|
133 |
-
- POPE - 80.59%
|
134 |
-
- V*-bench - 52.25% (OCR - 46.66%, GPT4V-hard - 41.17%, direct attributes - 43.48%, relative position - 65.79%)
|
135 |
-
|
136 |
-
## Examples
|
137 |
-
|
138 |
-
<a target="_blank" href="https://colab.research.google.com/drive/1sXDvVl5s9fTcE0N2bQGOlXhnNlKEdeun">
|
139 |
-
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
140 |
-
</a>
|
141 |
-
|
142 |
-
## License
|
143 |
-
|
144 |
-
The model is licensed under MIT license, but since the data used for model training is largely synthetic, you should also follow OpenAI and Google Gemini terms of service.
|
145 |
-
Which means don't create competitor models for them.
|
146 |
-
|
147 |
-
## Acknowledgments
|
148 |
-
|
149 |
-
Thanks to [ML Collective](https://mlcollective.org/) for providing credits for computing resources.
|
|
|
58 |
```python
|
59 |
from huggingface_hub import hf_hub_download
|
60 |
|
61 |
+
hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_llava.py", local_dir="./", force_download=True)
|
62 |
+
hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_phi.py", local_dir="./", force_download=True)
|
63 |
+
hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_llava.py", local_dir="./", force_download=True)
|
64 |
+
hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_phi.py", local_dir="./", force_download=True)
|
65 |
+
hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="processing_llava.py", local_dir="./", force_download=True)
|
66 |
```
|
67 |
|
68 |
**Create a model**
|
|
|
71 |
from modeling_llava import LlavaForConditionalGeneration
|
72 |
import torch
|
73 |
|
74 |
+
model = LlavaForConditionalGeneration.from_pretrained("OEvortex/HelpingAI-Vision", torch_dtype=torch.float16)
|
75 |
model = model.to("cuda")
|
76 |
```
|
77 |
|
|
|
81 |
from transformers import AutoTokenizer
|
82 |
from processing_llava import LlavaProcessor, OpenCLIPImageProcessor
|
83 |
|
84 |
+
tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI-Vision")
|
85 |
image_processor = OpenCLIPImageProcessor(model.config.preprocess_config)
|
86 |
processor = LlavaProcessor(image_processor, tokenizer)
|
87 |
```
|
|
|
123 |
with torch.inference_mode():
|
124 |
output = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.4, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id)
|
125 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|