mesolitica
/

llava-v1.6-34b-hf-awq

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d00601c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "\n",
+    "logging.basicConfig(level=logging.DEBUG)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a8d52aa0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:numexpr.utils:Note: NumExpr detected 24 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
+      "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
+      "INFO:datasets:PyTorch version 2.2.1+cu118 available.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from datasets import load_dataset\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ab513a4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from awq import AutoAWQForCausalLM\n",
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "model_path = 'llava-hf/llava-v1.6-34b-hf'\n",
+    "quant_path = './llava-v1.6-34b-awq'\n",
+    "quant_config = { \"zero_point\": True, \"q_group_size\": 128, \"w_bit\": 4, \"version\": \"GEMM\" }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "41d1869f",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+      "  warnings.warn(\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-34b-hf/resolve/main/config.json HTTP/1.1\" 200 0\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/models/llava-hf/llava-v1.6-34b-hf/revision/main HTTP/1.1\" 200 2489\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4783694b39234334bc03f32c5c451b8d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 25 files:   0%|          | 0/25 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fc2129edb44842588ff59d6e7512d2b6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:urllib3.connectionpool:Resetting dropped connection: huggingface.co\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-34b-hf/resolve/main/tokenizer_config.json HTTP/1.1\" 200 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = AutoAWQForCausalLM.from_pretrained(\n",
+    "    model_path, torch_dtype = torch.bfloat16,\n",
+    ")\n",
+    "_ = model.cuda()\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f9ddb7f5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tue May 28 04:52:08 2024       \r\n",
+      "+---------------------------------------------------------------------------------------+\r\n",
+      "| NVIDIA-SMI 535.54.03              Driver Version: 535.54.03    CUDA Version: 12.2     |\r\n",
+      "|-----------------------------------------+----------------------+----------------------+\r\n",
+      "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
+      "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\r\n",
+      "|                                         |                      |               MIG M. |\r\n",
+      "|=========================================+======================+======================|\r\n",
+      "|   0  NVIDIA A100 80GB PCIe          On  | 00000001:00:00.0 Off |                    0 |\r\n",
+      "| N/A   32C    P0              65W / 300W |  66718MiB / 81920MiB |      0%      Default |\r\n",
+      "|                                         |                      |             Disabled |\r\n",
+      "+-----------------------------------------+----------------------+----------------------+\r\n",
+      "                                                                                         \r\n",
+      "+---------------------------------------------------------------------------------------+\r\n",
+      "| Processes:                                                                            |\r\n",
+      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\r\n",
+      "|        ID   ID                                                             Usage      |\r\n",
+      "|=======================================================================================|\r\n",
+      "+---------------------------------------------------------------------------------------+\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "23d8a658",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_wikitext():\n",
+    "    data = load_dataset('wikitext', 'wikitext-2-raw-v1', split=\"train\")\n",
+    "    return [text for text in data[\"text\"] if text.strip() != '' and len(text.split(' ')) > 30]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "5dcf2167",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext HTTP/1.1\" 200 4846\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): s3.amazonaws.com:443\n",
+      "DEBUG:urllib3.connectionpool:https://s3.amazonaws.com:443 \"HEAD /datasets.huggingface.co/datasets/datasets/wikitext/wikitext.py HTTP/1.1\" 200 0\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext HTTP/1.1\" 200 4846\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/README.md HTTP/1.1\" 200 0\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/.huggingface.yaml HTTP/1.1\" 404 0\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): datasets-server.huggingface.co:443\n",
+      "DEBUG:urllib3.connectionpool:https://datasets-server.huggingface.co:443 \"GET /info?dataset=wikitext HTTP/1.1\" 200 None\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/revision/b08601e04326c79dfdd32d625aee71d232d685c3 HTTP/1.1\" 200 4846\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/tree/b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-103-raw-v1?recursive=False&expand=False HTTP/1.1\" 200 1017\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/revision/b08601e04326c79dfdd32d625aee71d232d685c3 HTTP/1.1\" 200 4846\n",
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /datasets/wikitext/resolve/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_infos.json HTTP/1.1\" 404 0\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /api/datasets/wikitext/tree/b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-2-raw-v1?recursive=False&expand=False HTTP/1.1\" 200 751\n",
+      "DEBUG:filelock:Attempting to acquire lock 140631544568352 on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
+      "DEBUG:filelock:Lock 140631544568352 acquired on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
+      "DEBUG:fsspec.local:open file: /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json\n",
+      "DEBUG:filelock:Attempting to release lock 140631544568352 on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
+      "DEBUG:filelock:Lock 140631544568352 released on /home/ubuntu/.cache/huggingface/datasets/_home_ubuntu_.cache_huggingface_datasets_wikitext_wikitext-2-raw-v1_0.0.0_b08601e04326c79dfdd32d625aee71d232d685c3.lock\n",
+      "DEBUG:filelock:Attempting to acquire lock 140635360447648 on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
+      "DEBUG:filelock:Lock 140635360447648 acquired on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
+      "DEBUG:fsspec.local:open file: /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3/dataset_info.json\n",
+      "DEBUG:filelock:Attempting to release lock 140635360447648 on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
+      "DEBUG:filelock:Lock 140635360447648 released on /home/ubuntu/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/0.0.0/b08601e04326c79dfdd32d625aee71d232d685c3_builder.lock\n",
+      "DEBUG:root: * Split into 46 blocks\n",
+      "AWQ: 100%|██████████| 60/60 [40:08<00:00, 40.14s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.quantize(tokenizer, quant_config=quant_config, calib_data=load_wikitext())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fa16f58f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2024-05-28 06:02:42,856] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.save_quantized(quant_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "7f8083da",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('./llava-v1.6-34b-awq/tokenizer_config.json',\n",
+       " './llava-v1.6-34b-awq/special_tokens_map.json',\n",
+       " './llava-v1.6-34b-awq/tokenizer.model',\n",
+       " './llava-v1.6-34b-awq/added_tokens.json',\n",
+       " './llava-v1.6-34b-awq/tokenizer.json')"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer.save_pretrained(quant_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "840e775b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/repos/create HTTP/1.1\" 409 108\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/llava-v1.6-34b-awq/resolve/main/README.md HTTP/1.1\" 404 0\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/validate-yaml HTTP/1.1\" 200 27\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/preupload/main HTTP/1.1\" 200 442\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /mesolitica/llava-v1.6-34b-awq.git/info/lfs/objects/batch HTTP/1.1\" 200 908\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c19ee02a86294d509578c5d11de7723b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.model:   0%|          | 0.00/1.03M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com:443\n",
+      "DEBUG:urllib3.connectionpool:https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com:443 \"PUT /repos/59/d4/59d45338b6a6ddb440f61ec405842ef87dffed6ec946242daa5c9bfe59de941a/386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQFN2FTF47%2F20240528%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240528T071302Z&X-Amz-Expires=900&X-Amz-Signature=c521b865377a68a968cad9bef88b199a7ce8f5967af74d7a4d2c2197b35da6c5&X-Amz-SignedHeaders=host&x-amz-storage-class=INTELLIGENT_TIERING&x-id=PutObject HTTP/1.1\" 200 0\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /mesolitica/llava-v1.6-34b-awq.git/info/lfs/objects/verify HTTP/1.1\" 200 2\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/commit/main HTTP/1.1\" 200 202\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/mesolitica/llava-v1.6-34b-awq/commit/03d9749ace4afe673620749b66ac77093bac742d', commit_message='Upload tokenizer', commit_description='', oid='03d9749ace4afe673620749b66ac77093bac742d', pr_url=None, pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer.push_to_hub('mesolitica/llava-v1.6-34b-awq')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "1af2adcf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+      "  warnings.warn(\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /llava-hf/llava-v1.6-34b-hf/resolve/main/config.json HTTP/1.1\" 200 0\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/repos/create HTTP/1.1\" 409 108\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /mesolitica/llava-v1.6-34b-awq/resolve/main/README.md HTTP/1.1\" 200 0\n",
+      "DEBUG:filelock:Attempting to acquire lock 140629439752000 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
+      "DEBUG:filelock:Lock 140629439752000 acquired on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"GET /mesolitica/llava-v1.6-34b-awq/resolve/main/README.md HTTP/1.1\" 200 5174\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "58af5cd027394e428e29dcc420e41565",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "DEBUG:filelock:Attempting to release lock 140629439752000 on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
+      "DEBUG:filelock:Lock 140629439752000 released on /home/ubuntu/.cache/huggingface/hub/.locks/models--mesolitica--llava-v1.6-34b-awq/bc5f30d6632ac0efdc7be2e9095e9e9579af2e33.lock\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/validate-yaml HTTP/1.1\" 200 27\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/preupload/main HTTP/1.1\" 200 143\n",
+      "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"POST /api/models/mesolitica/llava-v1.6-34b-awq/commit/main HTTP/1.1\" 200 202\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/mesolitica/llava-v1.6-34b-awq/commit/7f9ea6a51b95b743229de158f5bef5c5a33335db', commit_message='Upload config', commit_description='', oid='7f9ea6a51b95b743229de158f5bef5c5a33335db', pr_url=None, pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from transformers import AutoConfig, AwqConfig\n",
+    "\n",
+    "quantization_config = AwqConfig(\n",
+    "    bits=quant_config['w_bit'],\n",
+    "    group_size=quant_config['q_group_size'],\n",
+    "    zero_point=quant_config['zero_point'],\n",
+    "    backend='autoawq',\n",
+    "    version=quant_config['version'].lower(),\n",
+    ")\n",
+    "\n",
+    "config = AutoConfig.from_pretrained(model_path)\n",
+    "config.quantization_config = quantization_config\n",
+    "\n",
+    "config.push_to_hub('mesolitica/llava-v1.6-34b-awq')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "4546e2f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import HfApi\n",
+    "\n",
+    "api = HfApi()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "4d6dc901",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "api.upload_folder(\n",
+    "    folder_path='llava-v1.6-34b-awq',\n",
+    "    repo_id='mesolitica/llava-v1.6-34b-awq',\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b231844d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}