huseinzol05 commited on
Commit
70f9c3d
Β·
verified Β·
1 Parent(s): 8e7ddae

Upload inference.ipynb

Browse files
Files changed (1) hide show
  1. inference.ipynb +131 -0
inference.ipynb ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "953dfc98",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from awq import AutoAWQForCausalLM\n",
11
+ "from transformers import AutoTokenizer, AutoProcessor, TextStreamer\n",
12
+ "from PIL import Image\n",
13
+ "import requests\n",
14
+ "import torch"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 2,
20
+ "id": "c667ce7b",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "quant_path = './llava-v1.6-34b-awq'"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 3,
30
+ "id": "6adbf36e",
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stderr",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "Replacing layers...: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 60/60 [00:06<00:00, 9.86it/s]\n",
38
+ "Fusing layers...: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 60/60 [00:00<00:00, 166.86it/s]\n"
39
+ ]
40
+ }
41
+ ],
42
+ "source": [
43
+ "model = AutoAWQForCausalLM.from_quantized(quant_path, safetensors=True, device_map=\"auto\")\n",
44
+ "processor = AutoProcessor.from_pretrained(quant_path)"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 6,
50
+ "id": "d8ab8031",
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "url = \"https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true\"\n",
55
+ "image = Image.open(requests.get(url, stream=True).raw)\n",
56
+ "prompt = \"<|im_start|>system\\nAnswer the questions.<|im_end|><|im_start|>user\\n<image>\\nWhat is shown in this image?<|im_end|><|im_start|>assistant\\n\""
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 7,
62
+ "id": "350d6426",
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 8,
72
+ "id": "24c9ac90",
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "streamer = TextStreamer(processor)"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "id": "74e964aa",
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "<|im_start|> system\n",
90
+ "Answer the questions.<|im_end|><|im_start|> user\n",
91
+ "<image> \n",
92
+ "What is shown in this image?<|im_end|><|im_start|> assistant\n",
93
+ "The image shows a radar chart with various data points. The chart is a polar plot with concentric "
94
+ ]
95
+ }
96
+ ],
97
+ "source": [
98
+ "generation_output = model.generate(**inputs,max_new_tokens=1024, streamer = streamer)"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": null,
104
+ "id": "4ae4ca43",
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": []
108
+ }
109
+ ],
110
+ "metadata": {
111
+ "kernelspec": {
112
+ "display_name": "Python 3 (ipykernel)",
113
+ "language": "python",
114
+ "name": "python3"
115
+ },
116
+ "language_info": {
117
+ "codemirror_mode": {
118
+ "name": "ipython",
119
+ "version": 3
120
+ },
121
+ "file_extension": ".py",
122
+ "mimetype": "text/x-python",
123
+ "name": "python",
124
+ "nbconvert_exporter": "python",
125
+ "pygments_lexer": "ipython3",
126
+ "version": "3.10.12"
127
+ }
128
+ },
129
+ "nbformat": 4,
130
+ "nbformat_minor": 5
131
+ }