Spaces:
Running
Running
File size: 1,692 Bytes
8243283 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import requests, base64
import os
import json
def vision_inference(image_name):
try:
invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
stream = False
with open(image_name, "rb") as f:
image_b64 = base64.b64encode(f.read()).decode()
#assert len(image_b64) < 180_000, \
# "To upload larger images, use the assets API (see docs)"
api_key = os.environ["NIM_API_KEY"]
headers = {
"Authorization": f"Bearer {api_key}",
"Accept": "text/event-stream" if stream else "application/json"
}
payload = {
"model": 'meta/llama-3.2-11b-vision-instruct',
"messages": [
{
"role": "user",
"content": f'What is in this image? <img src="data:image/png;base64,{image_b64}" />'
}
],
"max_tokens": 512,
"temperature": 1.00,
"top_p": 1.00,
"stream": stream
}
response = requests.post(invoke_url, headers=headers, json=payload)
if stream:
for line in response.iter_lines():
if line:
#print(line.decode("utf-8"))
data = line.decode("utf-8")
#content = json.loads(data)['choices'][0]['delta'].get('content', '')
else:
#print(response.json())
data = response.json()
content = data['choices'][0]['message']['content']
#print(content)
return content
except Exception as e: # Added general exception handling
print(f"Error: {e}")
return None
#image_name = "/home/gaganyatri/Pictures/hackathon/eat-health/fruit-stall-1.jpg"
#content = vision_inference(image_name)
#print(content) |