OEvortex
/

HelpingAI-Vision

@@ -58,11 +58,11 @@ You are Dolphin, a helpful AI assistant.<|im_end|>
 ```python
 from huggingface_hub import hf_hub_download
-hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="configuration_llava.py", local_dir="./", force_download=True)
-hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="configuration_phi.py", local_dir="./", force_download=True)
-hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="modeling_llava.py", local_dir="./", force_download=True)
-hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="modeling_phi.py", local_dir="./", force_download=True)
-hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="processing_llava.py", local_dir="./", force_download=True)
 ```
 **Create a model**
@@ -71,7 +71,7 @@ hf_hub_download(repo_id="visheratin/LLaVA-3b", filename="processing_llava.py", l
 from modeling_llava import LlavaForConditionalGeneration
 import torch
-model = LlavaForConditionalGeneration.from_pretrained("visheratin/LLaVA-3b", torch_dtype=torch.float16)
 model = model.to("cuda")
 ```
@@ -81,7 +81,7 @@ model = model.to("cuda")
 from transformers import AutoTokenizer
 from processing_llava import LlavaProcessor, OpenCLIPImageProcessor
-tokenizer = AutoTokenizer.from_pretrained("visheratin/LLaVA-3b")
 image_processor = OpenCLIPImageProcessor(model.config.preprocess_config)
 processor = LlavaProcessor(image_processor, tokenizer)
 ```
@@ -123,27 +123,3 @@ import torch
 with torch.inference_mode():
   output = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.4, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id)
 ```
-## Benchmarks
-- TextVQA - 38.59%
-- GQA - 49.6%
-- VQAv2 - 64.24%
-- VizWiz - 24.88%
-- POPE - 80.59%
-- V*-bench - 52.25% (OCR - 46.66%, GPT4V-hard - 41.17%, direct attributes - 43.48%, relative position - 65.79%)
-## Examples
-<a target="_blank" href="https://colab.research.google.com/drive/1sXDvVl5s9fTcE0N2bQGOlXhnNlKEdeun">
-  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
-</a>
-## License
-The model is licensed under MIT license, but since the data used for model training is largely synthetic, you should also follow OpenAI and Google Gemini terms of service.
-Which means don't create competitor models for them.
-## Acknowledgments
-Thanks to [ML Collective](https://mlcollective.org/) for providing credits for computing resources.

 ```python
 from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_llava.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_phi.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_llava.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_phi.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="processing_llava.py", local_dir="./", force_download=True)
 ```
 **Create a model**
 from modeling_llava import LlavaForConditionalGeneration
 import torch
+model = LlavaForConditionalGeneration.from_pretrained("OEvortex/HelpingAI-Vision", torch_dtype=torch.float16)
 model = model.to("cuda")
 ```
 from transformers import AutoTokenizer
 from processing_llava import LlavaProcessor, OpenCLIPImageProcessor
+tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI-Vision")
 image_processor = OpenCLIPImageProcessor(model.config.preprocess_config)
 processor = LlavaProcessor(image_processor, tokenizer)
 ```
 with torch.inference_mode():
   output = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.4, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id)
 ```