huseinzol05 commited on
Commit
c52cfc0
·
verified ·
1 Parent(s): eaf69c4

Upload awq-llava-v1.6-34b-hf.ipynb

Browse files
Files changed (1) hide show
  1. awq-llava-v1.6-34b-hf.ipynb +441 -0
awq-llava-v1.6-34b-hf.ipynb ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "d00601c9",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import logging\n",
11
+ "\n",
12
+ "logging.basicConfig(level=logging.DEBUG)"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "id": "a8d52aa0",
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "name": "stderr",
23
+ "output_type": "stream",
24
+ "text": [
25
+ "INFO:numexpr.utils:Note: NumExpr detected 24 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
26
+ "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
27
+ "INFO:datasets:PyTorch version 2.2.1+cu118 available.\n"
28
+ ]
29
+ }
30
+ ],
31
+ "source": [
32
+ "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
33
+ "from datasets import load_dataset\n",
34
+ "import torch"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 4,
40
+ "id": "ab513a4e",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "from awq import AutoAWQForCausalLM\n",
45
+ "from transformers import AutoTokenizer\n",
46
+ "\n",
47
+ "model_path = 'llava-hf/llava-v1.6-34b-hf'\n",
48
+ "quant_path = './llava-v1.6-34b-awq'\n",
49
+ "quant_config = { \"zero_point\": True, \"q_group_size\": 128, \"w_bit\": 4, \"version\": \"GEMM\" }"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 5,
55
+ "id": "41d1869f",
56
+ "metadata": {
57
+ "scrolled": true
58
+ },
59
+ "outputs": [
60
+ {
61
+ "name": "stderr",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "/home/ubuntu/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
65
+ " warnings.warn(\n",
66
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
67
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-34b-hf/resolve/main/config.json HTTP/1.1\" 200 0\n",
68
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/models/llava-hf/llava-v1.6-34b-hf/revision/main HTTP/1.1\" 200 2489\n"
69
+ ]
70
+ },
71
+ {
72
+ "data": {
73
+ "application/vnd.jupyter.widget-view+json": {
74
+ "model_id": "4783694b39234334bc03f32c5c451b8d",
75
+ "version_major": 2,
76
+ "version_minor": 0
77
+ },
78
+ "text/plain": [
79
+ "Fetching 25 files: 0%| | 0/25 [00:00<?, ?it/s]"
80
+ ]
81
+ },
82
+ "metadata": {},
83
+ "output_type": "display_data"
84
+ },
85
+ {
86
+ "data": {
87
+ "application/vnd.jupyter.widget-view+json": {
88
+ "model_id": "fc2129edb44842588ff59d6e7512d2b6",
89
+ "version_major": 2,
90
+ "version_minor": 0
91
+ },
92
+ "text/plain": [
93
+ "Loading checkpoint shards: 0%| | 0/15 [00:00<?, ?it/s]"
94
+ ]
95
+ },
96
+ "metadata": {},
97
+ "output_type": "display_data"
98
+ },
99
+ {
100
+ "name": "stderr",
101
+ "output_type": "stream",
102
+ "text": [
103
+ "DEBUG:urllib3.connectionpool:Resetting dropped connection: huggingface.co\n",
104
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-34b-hf/resolve/main/tokenizer_config.json HTTP/1.1\" 200 0\n"
105
+ ]
106
+ }
107
+ ],
108
+ "source": [
109
+ "model = AutoAWQForCausalLM.from_pretrained(\n",
110
+ " model_path, torch_dtype = torch.bfloat16,\n",
111
+ ")\n",
112
+ "_ = model.cuda()\n",
113
+ "tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 6,
119
+ "id": "f9ddb7f5",
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "name": "stdout",
124
+ "output_type": "stream",
125
+ "text": [
126
+ "Tue May 28 04:52:08 2024 \r\n",
127
+ "+---------------------------------------------------------------------------------------+\r\n",
128
+ "| NVIDIA-SMI 535.54.03 Driver Version: 535.54.03 CUDA Version: 12.2 |\r\n",
129
+ "|-----------------------------------------+----------------------+----------------------+\r\n",
130
+ "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n",
131
+ "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\r\n",
132
+ "| | | MIG M. |\r\n",
133
+ "|=========================================+======================+======================|\r\n",
134
+ "| 0 NVIDIA A100 80GB PCIe On | 00000001:00:00.0 Off | 0 |\r\n",
135
+ "| N/A 32C P0 65W / 300W | 66718MiB / 81920MiB | 0% Default |\r\n",
136
+ "| | | Disabled |\r\n",
137
+ "+-----------------------------------------+----------------------+----------------------+\r\n",
138
+ " \r\n",
139
+ "+---------------------------------------------------------------------------------------+\r\n",
140
+ "| Processes: |\r\n",
141
+ "| GPU GI CI PID Type Process name GPU Memory |\r\n",
142
+ "| ID ID Usage |\r\n",
143
+ "|=======================================================================================|\r\n",
144
+ "+---------------------------------------------------------------------------------------+\r\n"
145
+ ]
146
+ }
147
+ ],
148
+ "source": [
149
+ "!nvidia-smi"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": 7,
155
+ "id": "23d8a658",
156
+ "metadata": {},
157
+ "outputs": [],
158
+ "source": [
159
+ "def load_wikitext():\n",
160
+ " data = load_dataset('wikitext', 'wikitext-2-raw-v1', split=\"train\")\n",
161
+ " return [text for text in data[\"text\"] if text.strip() != '' and len(text.split(' ')) > 30]"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 8,
167
+ "id": "5dcf2167",
168
+ "metadata": {},
169
+ "outputs": [
170
+ {
171
+ "name": "stderr",
172
+ "output_type": "stream",
173
+ "text": [
174
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext HTTP/1.1\" 200 4846\n",
175
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443\n",
176
+ "DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 \"HEAD /datasets.huggingface.co/datasets/datasets/wikitext/wikitext.py HTTP/1.1\" 200 0\n",
177
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext HTTP/1.1\" 200 4846\n",
178
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
179
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/README.md HTTP/1.1\" 200 0\n",
180
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
181
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/.huggingface.yaml HTTP/1.1\" 404 0\n",
182
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): datasets-server.huggingface.co:443\n",
183
+ "DEBUG:urllib3.connectionpool:https://datasets-server.huggingface.co:443 \"GET /info?dataset=wikitext HTTP/1.1\" 200 None\n",
184
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/revision/b08601e04326c79dfdd32d625aee71d232d685c3 HTTP/1.1\" 200 4846\n",
185
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/tree/b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-103-raw-v1?recursive=False&expand=False HTTP/1.1\" 200 1017\n",
186
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
187
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/revision/b08601e04326c79dfdd32d625aee71d232d685c3 HTTP/1.1\" 200 4846\n",
188
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
189
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_infos.json HTTP/1.1\" 404 0\n",
190
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/tree/b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-2-raw-v1?recursive=False&expand=False HTTP/1.1\" 200 751\n",
191
+ "DEBUG:filelock:Attempting to acquire lock 140631544568352 on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
192
+ "DEBUG:filelock:Lock 140631544568352 acquired on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
193
+ "DEBUG:fsspec.local:open file: /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json\n",
194
+ "DEBUG:filelock:Attempting to release lock 140631544568352 on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
195
+ "DEBUG:filelock:Lock 140631544568352 released on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
196
+ "DEBUG:filelock:Attempting to acquire lock 140635360447648 on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
197
+ "DEBUG:filelock:Lock 140635360447648 acquired on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
198
+ "DEBUG:fsspec.local:open file: /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json\n",
199
+ "DEBUG:filelock:Attempting to release lock 140635360447648 on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
200
+ "DEBUG:filelock:Lock 140635360447648 released on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
201
+ "DEBUG:root: * Split into 46 blocks\n",
202
+ "AWQ: 100%|██████████| 60/60 [40:08<00:00, 40.14s/it]\n"
203
+ ]
204
+ }
205
+ ],
206
+ "source": [
207
+ "model.quantize(tokenizer, quant_config=quant_config, calib_data=load_wikitext())"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": 9,
213
+ "id": "fa16f58f",
214
+ "metadata": {},
215
+ "outputs": [
216
+ {
217
+ "name": "stdout",
218
+ "output_type": "stream",
219
+ "text": [
220
+ "[2024-05-28 06:02:42,856] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
221
+ ]
222
+ }
223
+ ],
224
+ "source": [
225
+ "model.save_quantized(quant_path)"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": 10,
231
+ "id": "7f8083da",
232
+ "metadata": {
233
+ "scrolled": true
234
+ },
235
+ "outputs": [
236
+ {
237
+ "data": {
238
+ "text/plain": [
239
+ "('./llava-v1.6-34b-awq/tokenizer_config.json',\n",
240
+ " './llava-v1.6-34b-awq/special_tokens_map.json',\n",
241
+ " './llava-v1.6-34b-awq/tokenizer.model',\n",
242
+ " './llava-v1.6-34b-awq/added_tokens.json',\n",
243
+ " './llava-v1.6-34b-awq/tokenizer.json')"
244
+ ]
245
+ },
246
+ "execution_count": 10,
247
+ "metadata": {},
248
+ "output_type": "execute_result"
249
+ }
250
+ ],
251
+ "source": [
252
+ "tokenizer.save_pretrained(quant_path)"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": 17,
258
+ "id": "840e775b",
259
+ "metadata": {
260
+ "scrolled": true
261
+ },
262
+ "outputs": [
263
+ {
264
+ "name": "stderr",
265
+ "output_type": "stream",
266
+ "text": [
267
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/repos/create HTTP/1.1\" 409 108\n",
268
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/llava-v1.6-34b-awq/resolve/main/README.md HTTP/1.1\" 404 0\n",
269
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/validate-yaml HTTP/1.1\" 200 27\n",
270
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/preupload/main HTTP/1.1\" 200 442\n",
271
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /mesolitica/llava-v1.6-34b-awq.git/info/lfs/objects/batch HTTP/1.1\" 200 908\n"
272
+ ]
273
+ },
274
+ {
275
+ "data": {
276
+ "application/vnd.jupyter.widget-view+json": {
277
+ "model_id": "c19ee02a86294d509578c5d11de7723b",
278
+ "version_major": 2,
279
+ "version_minor": 0
280
+ },
281
+ "text/plain": [
282
+ "tokenizer.model: 0%| | 0.00/1.03M [00:00<?, ?B/s]"
283
+ ]
284
+ },
285
+ "metadata": {},
286
+ "output_type": "display_data"
287
+ },
288
+ {
289
+ "name": "stderr",
290
+ "output_type": "stream",
291
+ "text": [
292
+ "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com:443\n",
293
+ "DEBUG:urllib3.connectionpool:https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com:443 \"PUT /repos/59/d4/59d45338b6a6ddb440f61ec405842ef87dffed6ec946242daa5c9bfe59de941a/386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240528%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240528T071302Z&X-Amz-Expires=900&X-Amz-Signature=c521b865377a68a968cad9bef88b199a7ce8f5967af74d7a4d2c2197b35da6c5&X-Amz-SignedHeaders=host&x-amz-storage-class=INTELLIGENT_TIERING&x-id=PutObject HTTP/1.1\" 200 0\n",
294
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /mesolitica/llava-v1.6-34b-awq.git/info/lfs/objects/verify HTTP/1.1\" 200 2\n",
295
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/commit/main HTTP/1.1\" 200 202\n"
296
+ ]
297
+ },
298
+ {
299
+ "data": {
300
+ "text/plain": [
301
+ "CommitInfo(commit_url='https://huggingface.co/mesolitica/llava-v1.6-34b-awq/commit/03d9749ace4afe673620749b66ac77093bac742d', commit_message='Upload tokenizer', commit_description='', oid='03d9749ace4afe673620749b66ac77093bac742d', pr_url=None, pr_revision=None, pr_num=None)"
302
+ ]
303
+ },
304
+ "execution_count": 17,
305
+ "metadata": {},
306
+ "output_type": "execute_result"
307
+ }
308
+ ],
309
+ "source": [
310
+ "tokenizer.push_to_hub('mesolitica/llava-v1.6-34b-awq')"
311
+ ]
312
+ },
313
+ {
314
+ "cell_type": "code",
315
+ "execution_count": 19,
316
+ "id": "1af2adcf",
317
+ "metadata": {},
318
+ "outputs": [
319
+ {
320
+ "name": "stderr",
321
+ "output_type": "stream",
322
+ "text": [
323
+ "/home/ubuntu/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
324
+ " warnings.warn(\n",
325
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-34b-hf/resolve/main/config.json HTTP/1.1\" 200 0\n",
326
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/repos/create HTTP/1.1\" 409 108\n",
327
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/llava-v1.6-34b-awq/resolve/main/README.md HTTP/1.1\" 200 0\n",
328
+ "DEBUG:filelock:Attempting to acquire lock 140629439752000 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
329
+ "DEBUG:filelock:Lock 140629439752000 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
330
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /mesolitica/llava-v1.6-34b-awq/resolve/main/README.md HTTP/1.1\" 200 5174\n"
331
+ ]
332
+ },
333
+ {
334
+ "data": {
335
+ "application/vnd.jupyter.widget-view+json": {
336
+ "model_id": "58af5cd027394e428e29dcc420e41565",
337
+ "version_major": 2,
338
+ "version_minor": 0
339
+ },
340
+ "text/plain": [
341
+ "README.md: 0%| | 0.00/5.17k [00:00<?, ?B/s]"
342
+ ]
343
+ },
344
+ "metadata": {},
345
+ "output_type": "display_data"
346
+ },
347
+ {
348
+ "name": "stderr",
349
+ "output_type": "stream",
350
+ "text": [
351
+ "DEBUG:filelock:Attempting to release lock 140629439752000 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
352
+ "DEBUG:filelock:Lock 140629439752000 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
353
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/validate-yaml HTTP/1.1\" 200 27\n",
354
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/preupload/main HTTP/1.1\" 200 143\n",
355
+ "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/commit/main HTTP/1.1\" 200 202\n"
356
+ ]
357
+ },
358
+ {
359
+ "data": {
360
+ "text/plain": [
361
+ "CommitInfo(commit_url='https://huggingface.co/mesolitica/llava-v1.6-34b-awq/commit/7f9ea6a51b95b743229de158f5bef5c5a33335db', commit_message='Upload config', commit_description='', oid='7f9ea6a51b95b743229de158f5bef5c5a33335db', pr_url=None, pr_revision=None, pr_num=None)"
362
+ ]
363
+ },
364
+ "execution_count": 19,
365
+ "metadata": {},
366
+ "output_type": "execute_result"
367
+ }
368
+ ],
369
+ "source": [
370
+ "from transformers import AutoConfig, AwqConfig\n",
371
+ "\n",
372
+ "quantization_config = AwqConfig(\n",
373
+ " bits=quant_config['w_bit'],\n",
374
+ " group_size=quant_config['q_group_size'],\n",
375
+ " zero_point=quant_config['zero_point'],\n",
376
+ " backend='autoawq',\n",
377
+ " version=quant_config['version'].lower(),\n",
378
+ ")\n",
379
+ "\n",
380
+ "config = AutoConfig.from_pretrained(model_path)\n",
381
+ "config.quantization_config = quantization_config\n",
382
+ "\n",
383
+ "config.push_to_hub('mesolitica/llava-v1.6-34b-awq')"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": 20,
389
+ "id": "4546e2f1",
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": [
393
+ "from huggingface_hub import HfApi\n",
394
+ "\n",
395
+ "api = HfApi()"
396
+ ]
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": 24,
401
+ "id": "4d6dc901",
402
+ "metadata": {},
403
+ "outputs": [],
404
+ "source": [
405
+ "api.upload_folder(\n",
406
+ " folder_path='llava-v1.6-34b-awq',\n",
407
+ " repo_id='mesolitica/llava-v1.6-34b-awq',\n",
408
+ ")"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": null,
414
+ "id": "b231844d",
415
+ "metadata": {},
416
+ "outputs": [],
417
+ "source": []
418
+ }
419
+ ],
420
+ "metadata": {
421
+ "kernelspec": {
422
+ "display_name": "Python 3 (ipykernel)",
423
+ "language": "python",
424
+ "name": "python3"
425
+ },
426
+ "language_info": {
427
+ "codemirror_mode": {
428
+ "name": "ipython",
429
+ "version": 3
430
+ },
431
+ "file_extension": ".py",
432
+ "mimetype": "text/x-python",
433
+ "name": "python",
434
+ "nbconvert_exporter": "python",
435
+ "pygments_lexer": "ipython3",
436
+ "version": "3.10.12"
437
+ }
438
+ },
439
+ "nbformat": 4,
440
+ "nbformat_minor": 5
441
+ }