byroneverson commited on
Commit
8430d09
Β·
verified Β·
1 Parent(s): c4fced5

Upload abliterate-gemma-2-27b-it.ipynb

Browse files
Files changed (1) hide show
  1. abliterate-gemma-2-27b-it.ipynb +1 -0
abliterate-gemma-2-27b-it.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[],"dockerImageVersionId":30761,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# LLM Abliterate v1.2 script, adapted for google/abliterate-gemma-2-27b (uses bartowski's gguf for llama.cpp)\n\nAuthor: byroneverson\n\nThis script ran at kaggle.com, accelerator: None, persistence: Files only","metadata":{}},{"cell_type":"markdown","source":"# Download bartowski/gemma-2-27b-it-GGUF gemma-2-27b-it-Q4_K_M.gguf locally\n\nUsing smallest quant for now to test method and will try q8 first when method works to see if kaggle had the memory for it","metadata":{}},{"cell_type":"code","source":"%cd /kaggle/working\n\nfrom huggingface_hub import hf_hub_download\n\nhf_hub_download(repo_id=\"bartowski/gemma-2-27b-it-GGUF\", filename=\"gemma-2-27b-it-Q4_K_M.gguf\", local_dir=\"/kaggle/working\")","metadata":{"execution":{"iopub.status.busy":"2024-08-26T08:20:58.868406Z","iopub.execute_input":"2024-08-26T08:20:58.868946Z","iopub.status.idle":"2024-08-26T08:24:14.872866Z","shell.execute_reply.started":"2024-08-26T08:20:58.868898Z","shell.execute_reply":"2024-08-26T08:24:14.871710Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stdout","text":"/kaggle/working\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"gemma-2-27b-it-Q4_K_M.gguf: 0%| | 0.00/16.6G [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7575c00dda99497baff9bb87d4741b31"}},"metadata":{}},{"execution_count":6,"output_type":"execute_result","data":{"text/plain":"'/kaggle/working/gemma-2-27b-it-Q4_K_M.gguf'"},"metadata":{}}]},{"cell_type":"markdown","source":"# Download original abliterator script for harmful and harmless instructions txt files\nCredit: https://github.com/Sumandora/remove-refusals-with-transformers\n\nTemporary: We need my fork of ggml-python because the official abetlen is out of date and ggml_tensor is incompatible at the moment ","metadata":{}},{"cell_type":"code","source":"%cd /kaggle/working\n!git clone https://github.com/Sumandora/remove-refusals-with-transformers.git\n!git clone --recurse-submodules https://github.com/byroneverson/ggml-python.git","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-08-24T23:50:02.664732Z","iopub.execute_input":"2024-08-24T23:50:02.665226Z","iopub.status.idle":"2024-08-24T23:50:16.842944Z","shell.execute_reply.started":"2024-08-24T23:50:02.665173Z","shell.execute_reply":"2024-08-24T23:50:16.841274Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stdout","text":"/kaggle/working\nfatal: destination path 'remove-refusals-with-transformers' already exists and is not an empty directory.\nCloning into 'llama-cpp-python'...\nremote: Enumerating objects: 8183, done.\u001b[K\nremote: Counting objects: 100% (2406/2406), done.\u001b[K\nremote: Compressing objects: 100% (302/302), done.\u001b[K\nremote: Total 8183 (delta 2294), reused 2118 (delta 2099), pack-reused 5777 (from 1)\u001b[K\nReceiving objects: 100% (8183/8183), 2.00 MiB | 10.17 MiB/s, done.\nResolving deltas: 100% (5349/5349), done.\nSubmodule 'vendor/llama.cpp' (https://github.com/ggerganov/llama.cpp.git) registered for path 'vendor/llama.cpp'\nCloning into '/kaggle/working/llama-cpp-python/vendor/llama.cpp'...\nremote: Enumerating objects: 32639, done. \nremote: Counting objects: 100% (11531/11531), done. \nremote: Compressing objects: 100% (805/805), done. \nremote: Total 32639 (delta 11183), reused 10777 (delta 10724), pack-reused 21108 (from 1) \nReceiving objects: 100% (32639/32639), 56.23 MiB | 22.56 MiB/s, done.\nResolving deltas: 100% (23594/23594), done.\nSubmodule path 'vendor/llama.cpp': checked out '1731d4238f9e4f925a750810e7f5480827c66dcf'\nSubmodule 'kompute' (https://github.com/nomic-ai/kompute.git) registered for path 'vendor/llama.cpp/ggml/src/kompute'\nCloning into '/kaggle/working/llama-cpp-python/vendor/llama.cpp/ggml/src/kompute'...\nremote: Enumerating objects: 9111, done. \nremote: Counting objects: 100% (246/246), done. \nremote: Compressing objects: 100% (142/142), done. \nremote: Total 9111 (delta 114), reused 180 (delta 94), pack-reused 8865 (from 1) \nReceiving objects: 100% (9111/9111), 17.58 MiB | 27.41 MiB/s, done.\nResolving deltas: 100% (5721/5721), done.\nSubmodule path 'vendor/llama.cpp/ggml/src/kompute': checked out '4565194ed7c32d1d2efa32ceab4d3c6cae006306'\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Install my ggml-python and normal llama-cpp-python","metadata":{}},{"cell_type":"code","source":"%cd /kaggle/working\n\n!pip install ./ggml-python\n!pip install llama-cpp-python\n!pip install jaxtyping\n!pip install einops","metadata":{"execution":{"iopub.status.busy":"2024-08-28T12:31:55.353968Z","iopub.execute_input":"2024-08-28T12:31:55.356198Z","iopub.status.idle":"2024-08-28T12:36:22.486789Z","shell.execute_reply.started":"2024-08-28T12:31:55.356091Z","shell.execute_reply":"2024-08-28T12:36:22.485117Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"/kaggle/working\nProcessing ./ggml-python\n Installing build dependencies ... \u001b[?25ldone\n\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: numpy>=1.20.0 in /opt/conda/lib/python3.10/site-packages (from ggml_python==0.0.37) (1.26.4)\nRequirement already satisfied: typing_extensions>=4.6.3 in /opt/conda/lib/python3.10/site-packages (from ggml_python==0.0.37) (4.12.2)\nBuilding wheels for collected packages: ggml_python\n Building wheel for ggml_python (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25h Created wheel for ggml_python: filename=ggml_python-0.0.37-cp310-cp310-linux_x86_64.whl size=702614 sha256=ef43a72a325d5a8510261b569a33f5daefd9089ffab3dde08c0d76bfcf5780e4\n Stored in directory: /root/.cache/pip/wheels/2c/e9/79/52a1e26e8ea183251d8785ae5d126df25b8aa30fddf1e13f32\nSuccessfully built ggml_python\nInstalling collected packages: ggml_python\nSuccessfully installed ggml_python-0.0.37\nCollecting llama-cpp-python\n Downloading llama_cpp_python-0.2.89.tar.gz (64.3 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.3/64.3 MB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: typing-extensions>=4.5.0 in /opt/conda/lib/python3.10/site-packages (from llama-cpp-python) (4.12.2)\nRequirement already satisfied: numpy>=1.20.0 in /opt/conda/lib/python3.10/site-packages (from llama-cpp-python) (1.26.4)\nCollecting diskcache>=5.6.1 (from llama-cpp-python)\n Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\nRequirement already satisfied: jinja2>=2.11.3 in /opt/conda/lib/python3.10/site-packages (from llama-cpp-python) (3.1.4)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2>=2.11.3->llama-cpp-python) (2.1.5)\nDownloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hBuilding wheels for collected packages: llama-cpp-python\n Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25ldone\n\u001b[?25h Created wheel for llama-cpp-python: filename=llama_cpp_python-0.2.89-cp310-cp310-linux_x86_64.whl size=3292280 sha256=4c1a6e70e2e85a256f69018eb392224e0b2a9997680f8127ff10dd42f5f399c2\n Stored in directory: /root/.cache/pip/wheels/51/aa/62/15368ae9ce6bc6037b8f6648ac27339822f2a76751232e4166\nSuccessfully built llama-cpp-python\nInstalling collected packages: diskcache, llama-cpp-python\nSuccessfully installed diskcache-5.6.3 llama-cpp-python-0.2.89\nCollecting jaxtyping\n Downloading jaxtyping-0.2.33-py3-none-any.whl.metadata (6.4 kB)\nCollecting typeguard==2.13.3 (from jaxtyping)\n Downloading typeguard-2.13.3-py3-none-any.whl.metadata (3.6 kB)\nDownloading jaxtyping-0.2.33-py3-none-any.whl (42 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.4/42.4 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hDownloading typeguard-2.13.3-py3-none-any.whl (17 kB)\nInstalling collected packages: typeguard, jaxtyping\n Attempting uninstall: typeguard\n Found existing installation: typeguard 4.3.0\n Uninstalling typeguard-4.3.0:\n Successfully uninstalled typeguard-4.3.0\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\nydata-profiling 4.9.0 requires scipy<1.14,>=1.4.1, but you have scipy 1.14.0 which is incompatible.\nydata-profiling 4.9.0 requires typeguard<5,>=3, but you have typeguard 2.13.3 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed jaxtyping-0.2.33 typeguard-2.13.3\nCollecting einops\n Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)\nDownloading einops-0.8.0-py3-none-any.whl (43 kB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hInstalling collected packages: einops\nSuccessfully installed einops-0.8.0\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Obtain estimated refusal direction vector\n\nDetermine the layer where the model has sufficiently developed a sense of difference between the harmful and harmless.\n\nIt seems that larger models still should have this done at a similar layer as smaller ones.\n\nFor models with hidden_size of 4096 and intermediate size ~14k, this is roughly layer 19 or 20 regardless of layer count.\n\nTODO: Perform PCA (Principal component analysis) in a separate step to help detect which layer(s) are ideal.","metadata":{}},{"cell_type":"code","source":"%cd /kaggle/working\n\nimport ctypes\nimport os\nimport multiprocessing\nimport random\nimport gc\nimport sys\n\n# llama.cpp/GGML library\nimport llama_cpp\nimport ggml\n\n# Easy tensor handling\nimport torch\nfrom math import prod\n\nfrom tqdm import tqdm\n\n# Number of total layers in your model\ntotal_layers = 40\ntarget_percent = 0.5 # 50% through the layers\ntarget_index = int(total_layers * target_percent)\n\n# Number of instructions to average for our feature estimation (e.g. 512 for harmful and 512 for harmless)\ninstructions = 64 #512\n\n# Our local gguf model\n# TODO: Load model with only num_layers we actually need for this step\nlocal_repo_dir = \"/kaggle/working\"\nmodel_path = local_repo_dir + \"/\" + \"gemma-2-27b-it-Q4_K_M.gguf\"\n\n# Init llama backend\nllama_cpp.llama_backend_init(numa=False)\n\n# llama.cpp custom model code\n\ndef c_array_to_tensor(pointer, shape, torch_type):\n arr = (pointer._type_ * prod(shape)).from_address(\n ctypes.addressof(pointer.contents))\n return torch.frombuffer(arr, dtype=torch_type).view(*shape)\n\ndef model_load(model_path):\n # TODO: Attempt to hook num_layers\n model_params = llama_cpp.llama_model_default_params()\n model_params.n_gpu_layers = 0\n model_params.use_mmap = True\n model = llama_cpp.llama_load_model_from_file(model_path.encode(\"utf-8\"), model_params)\n \n return model\n\ndef model_free(model):\n llama_cpp.llama_free(model)\n\ndef model_apply_chat_template(model, role, content, add_assistant=True):\n chat_message = llama_cpp.llama_chat_message(role=role.encode(\"utf-8\"), content=content.encode(\"utf-8\"))\n buffer_length = len(content) * 2\n buffer = ctypes.create_string_buffer(buffer_length)\n result = llama_cpp.llama_chat_apply_template(model, None, ctypes.pointer(chat_message), 1, add_assistant, buffer, ctypes.c_int32(buffer_length))\n if result <= 0:\n return input_str\n elif result >= buffer_length:\n buffer_length = result + 1\n buffer = ctypes.create_string_buffer(buffer_length)\n result = llama_cpp.llama_chat_apply_template(model, None, ctypes.pointer(chat_message), 1, add_assistant, buffer, ctypes.c_int32(buffer_length))\n if result > 0:\n return buffer.value.decode(\"utf-8\")\n else:\n return content\n \ndef model_tokenize(model, prompt):\n prompt_count = len(prompt.encode('utf-8'))\n if prompt_count == 0:\n return []\n\n tokens = (ctypes.c_int32 * prompt_count)()\n count = llama_cpp.llama_tokenize(model, \n prompt.encode('utf-8'), \n ctypes.c_int32(prompt_count), \n tokens, \n ctypes.c_int32(prompt_count), \n True, \n True)\n if prompt_count > count:\n tokens = tokens[:count]\n return tokens\n\n# Callback should fill this as the model runs\n# 2 tensors for input embedding\n# 40 tensors per layer\n\ndef print_tensor_info(t_ptr):\n #: contiguous: {ggml.ggml_is_contiguous(t)}, permuted: {ggml.ggml_is_permuted(t)}, transposed: {ggml.ggml_is_transposed(t)}\"\n t = t_ptr.contents\n print(f\"{ggml.ggml_type_name(t.type)} {ggml.ggml_op_desc(t_ptr)} {t.name}\")\n print(f\" n_elements = {ggml.ggml_nelements(t)}\")\n print(f\" ne = ({t.ne[0]}, {t.ne[1]}, {t.ne[2]}, {t.ne[3]})\")\n print(f\" nb = ({t.nb[0]}, {t.nb[1]}, {t.nb[2]}, {t.nb[3]})\")\n is_host = ggml.ggml_backend_buffer_is_host(t.buffer)\n print(f\" is_host = {is_host}\")\n print(f\" buffer = {t.buffer}\")\n print(f\" data = {t.data}\")\n if ctypes.c_void_p.from_buffer(t.src[0]).value != None:\n print(f\" src[0] = {ggml.ggml_op_desc(t.src[0])}\")\n if ctypes.c_void_p.from_buffer(t.src[1]).value != None:\n print(f\" src[1] = {ggml.ggml_op_desc(t.src[1])}\")\n\nc_abort_callback = ctypes.CFUNCTYPE(\n ctypes.c_bool, ctypes.c_void_p\n)(abort_callback)\n\nclass CallbackDataStruct(ctypes.Structure):\n _fields_ = [\n (\"layer_tensor_count\", ctypes.c_int),\n (\"layer_index\", ctypes.c_int),\n (\"target_index\", ctypes.c_int),\n (\"tensor_index\", ctypes.c_int),\n (\"tensor\", ctypes.c_void_p)\n ]\n\ncallback_data = CallbackDataStruct()\ncallback_data.target_index = 20 #target_index\ncallback_data.layer_tensor_count = 40\ncallback_data.layer_index = -1\ncallback_data.tensor_index = 0\ncallback_data.tensor = 0\n\ndef hidden_states_eval_callback(t_void_p, ask, user_data):\n cb_data_ptr = ctypes.cast(user_data, ctypes.POINTER(CallbackDataStruct))\n cb_data = cb_data_ptr.contents\n t_ptr = ctypes.cast(t_void_p, ctypes.POINTER(ggml.ggml_tensor))\n t = t_ptr.contents\n if ask:\n #print(f\"{ggml.ggml_type_name(t.type)} {ggml.ggml_op_desc(t_ptr)} {t.name} ({t.ne[0]}, {t.ne[1]})\")\n index = cb_data.tensor_index\n cb_data.tensor_index += 1\n if index % cb_data.layer_tensor_count == 1: #1\n layer_index = cb_data.layer_index\n cb_data.layer_index += 1\n if layer_index >= -1:\n sys.stdout.flush()\n if layer_index == cb_data.target_index:\n #print(f\"Target layer {layer_index}, tensor {index}\")\n # Request data next callback\n return True\n else:\n cb_data.tensor = t_void_p\n #print_tensor_info(t_ptr)\n sys.stdout.flush()\n \n # Returning false should stop graph in it's tracks without error, this may let us get the current progress in embeddings?\n return False #True # Continue graph\n # return True to request data next callback, false to skip, ask will be False when returning data from a request\n return False\n\nc_hidden_states_eval_callback = ctypes.CFUNCTYPE(\n ctypes.c_bool, ctypes.c_void_p, ctypes.c_bool, ctypes.c_void_p\n)(hidden_states_eval_callback) \n\ndef model_generate_hidden_states(model, prompt, n_predict=1):\n # Reset callbacks count\n callback_data.layer_index = -1\n callback_data.tensor_index = 0\n \n # Start with no past\n n_past = 0\n \n # Reset hidden_states\n hidden_states = []\n \n #DEBUG\n #prompt = \"Test?\"\n \n # Chat template\n prompt = model_apply_chat_template(model, \n role=\"user\", \n content=prompt, \n add_assistant=True)\n \n # Add space for llama only, check model params for add space var\n add_space = False # FIX\n if add_space:\n prompt = b\" \" + prompt\n \n toks = model_tokenize(model, prompt)\n n_tokens = len(toks)\n #print(prompt)\n #print(n_tokens)\n #print(toks)\n\n # Clear cache per example\n llama_cpp.llama_kv_cache_clear(context)\n \n # Fill batch\n batch.n_tokens = n_tokens\n for i in range(n_tokens):\n batch.token[i] = toks[i]\n batch.pos[i] = i\n batch.seq_id[i][0] = 0\n batch.n_seq_id[i] = 1\n batch.logits[i] = False\n batch.logits[n_tokens - 1] = True\n \n # Decode batch\n result = llama_cpp.llama_decode(context, batch)\n if result == 1:\n print(\"decode warning\")\n elif result < 0:\n print(\"decode error\")\n sys.stdout.flush()\n\n # Get data from tensor\n t_ptr = ctypes.cast(callback_data.tensor, ctypes.POINTER(ggml.ggml_tensor))\n #print_tensor_info(t_ptr)\n data = ctypes.cast(t_ptr.contents.data, ctypes.POINTER(ctypes.c_float))\n n_elements = ggml.ggml_nelements(t_ptr)\n n_embd = llama_cpp.llama_n_embd(model)\n \n # Convert float buffer to torch array for easy handling\n hidden_state = c_array_to_tensor(data, (n_elements // n_embd, n_embd), torch.float32)\n #print(type(hidden_state))\n #print(hidden_state.shape)\n #print(hidden_state)\n #print(hidden_state[0])\n #sys.stdout.flush()\n return hidden_state[-1]\n\n# Clear memory of past model usage\nmodel = None\ngc.collect()\n\n# Load model\nmodel = model_load(model_path)\n\nprint(\"Instruction count: \" + str(instructions))\nprint(\"Target layer index: \" + str(target_index))\n\nwith open(\"./remove-refusals-with-transformers/harmful.txt\", \"r\") as f:\n harmful = f.readlines()\n\nwith open(\"./remove-refusals-with-transformers/harmless.txt\", \"r\") as f:\n harmless = f.readlines()\n\nharmful_instructions = random.sample(harmful, instructions)\nharmless_instructions = random.sample(harmless, instructions)\n\ngc.collect()\n\n# Generate target layer hidden state files for harmful and harmless features\ndef save_target_hidden_states(prompt, index, feature):\n bar.update(n=1)\n \n # Generates using each example, cache is disables so it doesn't keep previous examples in it's context, obviously we need to output the full states\n # It would be ideal if we could have it output the states for only the layer we want\n output = model_generate_hidden_states(model, prompt)\n # We still select the target layers, then only keep the hidden state of the last token (-1 part)\n hidden = output #output.hidden_states[0][target_index][:, -1, :]\n # Save each hidden state to disk to keep memory usage at a minimum\n dir_path = local_repo_dir + \"/\" + feature + \"_states\"\n file_path = dir_path + \"/\" + str(index) + \".pt\"\n if not os.path.exists(dir_path):\n os.makedirs(dir_path)\n torch.save(hidden, file_path)\n\n# Create context\ncontext_params = llama_cpp.llama_context_default_params()\nn_threads = multiprocessing.cpu_count()\ncontext_params.n_threads = n_threads\ncontext_params.n_threads_batch = n_threads\ncontext_params.seed = 1234\ncontext_params.cb_eval = c_hidden_states_eval_callback\ncontext_params.cb_eval_user_data = ctypes.cast(ctypes.pointer(callback_data), ctypes.c_void_p)\ncontext = llama_cpp.llama_new_context_with_model(model, context_params)\n\n# Create batch\nbatch = llama_cpp.llama_batch_init(context_params.n_batch, 0, context_params.n_ctx)\n\n# Progress bar\n\nimport time\ntime.sleep(5) # Let model finish printing before start\nsys.stdout.flush()\nmax_its = instructions * 2\nbar = tqdm(total=max_its)\n\n# Save harmful states\nfor index, instruction in enumerate(harmful_instructions):\n save_target_hidden_states(instruction, index, \"harmful\")\n\n# Save harmless states\nfor index, instruction in enumerate(harmless_instructions):\n save_target_hidden_states(instruction, index, \"harmless\")\n\nbar.close()\n\n# Clear memory of model usage\n# Free batch, model, context, and backend\nllama_cpp.llama_batch_free(batch)\nllama_cpp.llama_free(context)\nllama_cpp.llama_free_model(model)\nllama_cpp.llama_backend_free()\n\nmodel = None\ncontext = None\nharmful_instructions = None\nharmless_instructions = None\ngc.collect()\n\n# Load the hidden state of an instruction for a specific feature\ndef load_target_hidden_state(feature, index):\n file_path = local_repo_dir + \"/\" + feature + \"_states\" + \"/\" + str(index) + \".pt\"\n return torch.load(file_path)\n\n# Get the means of harmful states\nharmful_hidden = [load_target_hidden_state(\"harmful\", i) for i in range(instructions)]\nharmful_mean = torch.stack(harmful_hidden).mean(dim=0) \n\nharmful_hidden = None\ngc.collect()\n\n# Get the means of harmless states\nharmless_hidden = [load_target_hidden_state(\"harmless\", i) for i in range(instructions)]\nharmless_mean = torch.stack(harmless_hidden).mean(dim=0) \n\nharmful_hidden = None\ngc.collect()\n \n# Get refusal direction tensor and save it to disk\nrefusal_direction = harmful_mean - harmless_mean\nrefusal_direction = refusal_direction / refusal_direction.norm()\nprint(refusal_direction)\nlocal_repo_dir = \"/kaggle/working/gemma-2-27b-it\"\nif not os.path.exists(local_repo_dir):\n os.makedirs(local_repo_dir)\ntorch.save(refusal_direction, local_repo_dir + \"/\" + \"refusal_direction.pt\")\n\n# Clean-up\nharmful_hidden = None\nharmless_hidden = None\ngc.collect()","metadata":{"execution":{"iopub.status.busy":"2024-08-28T10:32:42.654236Z","iopub.execute_input":"2024-08-28T10:32:42.654774Z","iopub.status.idle":"2024-08-28T10:50:14.228447Z","shell.execute_reply.started":"2024-08-28T10:32:42.654717Z","shell.execute_reply":"2024-08-28T10:50:14.227268Z"},"trusted":true},"execution_count":28,"outputs":[{"name":"stderr","text":"llama_model_loader: loaded meta data with 33 key-value pairs and 508 tensors from /kaggle/working/gemma-2-27b-it-Q4_K_M.gguf (version GGUF V3 (latest))\nllama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\nllama_model_loader: - kv 0: general.architecture str = gemma2\nllama_model_loader: - kv 1: general.name str = gemma-2-27b-it\nllama_model_loader: - kv 2: gemma2.context_length u32 = 8192\nllama_model_loader: - kv 3: gemma2.embedding_length u32 = 4608\nllama_model_loader: - kv 4: gemma2.block_count u32 = 46\nllama_model_loader: - kv 5: gemma2.feed_forward_length u32 = 36864\nllama_model_loader: - kv 6: gemma2.attention.head_count u32 = 32\nllama_model_loader: - kv 7: gemma2.attention.head_count_kv u32 = 16\nllama_model_loader: - kv 8: gemma2.attention.layer_norm_rms_epsilon f32 = 0.000001\nllama_model_loader: - kv 9: gemma2.attention.key_length u32 = 128\nllama_model_loader: - kv 10: gemma2.attention.value_length u32 = 128\nllama_model_loader: - kv 11: general.file_type u32 = 15\nllama_model_loader: - kv 12: gemma2.attn_logit_softcapping f32 = 50.000000\nllama_model_loader: - kv 13: gemma2.final_logit_softcapping f32 = 30.000000\nllama_model_loader: - kv 14: gemma2.attention.sliding_window u32 = 4096\nllama_model_loader: - kv 15: tokenizer.ggml.model str = llama\nllama_model_loader: - kv 16: tokenizer.ggml.pre str = default\nllama_model_loader: - kv 17: tokenizer.ggml.tokens arr[str,256000] = [\"<pad>\", \"<eos>\", \"<bos>\", \"<unk>\", ...\n","output_type":"stream"},{"name":"stdout","text":"/kaggle/working\n","output_type":"stream"},{"name":"stderr","text":"llama_model_loader: - kv 18: tokenizer.ggml.scores arr[f32,256000] = [-1000.000000, -1000.000000, -1000.00...\nllama_model_loader: - kv 19: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...\nllama_model_loader: - kv 20: tokenizer.ggml.bos_token_id u32 = 2\nllama_model_loader: - kv 21: tokenizer.ggml.eos_token_id u32 = 1\nllama_model_loader: - kv 22: tokenizer.ggml.unknown_token_id u32 = 3\nllama_model_loader: - kv 23: tokenizer.ggml.padding_token_id u32 = 0\nllama_model_loader: - kv 24: tokenizer.ggml.add_bos_token bool = true\nllama_model_loader: - kv 25: tokenizer.ggml.add_eos_token bool = false\nllama_model_loader: - kv 26: tokenizer.chat_template str = {{ bos_token }}{% if messages[0]['rol...\nllama_model_loader: - kv 27: tokenizer.ggml.add_space_prefix bool = false\nllama_model_loader: - kv 28: general.quantization_version u32 = 2\nllama_model_loader: - kv 29: quantize.imatrix.file str = /models_out/gemma-2-27b-it-GGUF/gemma...\nllama_model_loader: - kv 30: quantize.imatrix.dataset str = /training_dir/calibration_datav3.txt\nllama_model_loader: - kv 31: quantize.imatrix.entries_count i32 = 322\nllama_model_loader: - kv 32: quantize.imatrix.chunks_count i32 = 128\nllama_model_loader: - type f32: 185 tensors\nllama_model_loader: - type q4_K: 278 tensors\nllama_model_loader: - type q6_K: 45 tensors\nllm_load_vocab: special tokens cache size = 217\nllm_load_vocab: token to piece cache size = 1.6014 MB\nllm_load_print_meta: format = GGUF V3 (latest)\nllm_load_print_meta: arch = gemma2\nllm_load_print_meta: vocab type = SPM\nllm_load_print_meta: n_vocab = 256000\nllm_load_print_meta: n_merges = 0\nllm_load_print_meta: vocab_only = 0\nllm_load_print_meta: n_ctx_train = 8192\nllm_load_print_meta: n_embd = 4608\nllm_load_print_meta: n_layer = 46\nllm_load_print_meta: n_head = 32\nllm_load_print_meta: n_head_kv = 16\nllm_load_print_meta: n_rot = 128\nllm_load_print_meta: n_swa = 4096\nllm_load_print_meta: n_embd_head_k = 128\nllm_load_print_meta: n_embd_head_v = 128\nllm_load_print_meta: n_gqa = 2\nllm_load_print_meta: n_embd_k_gqa = 2048\nllm_load_print_meta: n_embd_v_gqa = 2048\nllm_load_print_meta: f_norm_eps = 0.0e+00\nllm_load_print_meta: f_norm_rms_eps = 1.0e-06\nllm_load_print_meta: f_clamp_kqv = 0.0e+00\nllm_load_print_meta: f_max_alibi_bias = 0.0e+00\nllm_load_print_meta: f_logit_scale = 0.0e+00\nllm_load_print_meta: n_ff = 36864\nllm_load_print_meta: n_expert = 0\nllm_load_print_meta: n_expert_used = 0\nllm_load_print_meta: causal attn = 1\nllm_load_print_meta: pooling type = 0\nllm_load_print_meta: rope type = 2\nllm_load_print_meta: rope scaling = linear\nllm_load_print_meta: freq_base_train = 10000.0\nllm_load_print_meta: freq_scale_train = 1\nllm_load_print_meta: n_ctx_orig_yarn = 8192\nllm_load_print_meta: rope_finetuned = unknown\nllm_load_print_meta: ssm_d_conv = 0\nllm_load_print_meta: ssm_d_inner = 0\nllm_load_print_meta: ssm_d_state = 0\nllm_load_print_meta: ssm_dt_rank = 0\nllm_load_print_meta: ssm_dt_b_c_rms = 0\nllm_load_print_meta: model type = 27B\nllm_load_print_meta: model ftype = Q4_K - Medium\nllm_load_print_meta: model params = 27.23 B\nllm_load_print_meta: model size = 15.50 GiB (4.89 BPW) \nllm_load_print_meta: general.name = gemma-2-27b-it\nllm_load_print_meta: BOS token = 2 '<bos>'\nllm_load_print_meta: EOS token = 1 '<eos>'\nllm_load_print_meta: UNK token = 3 '<unk>'\nllm_load_print_meta: PAD token = 0 '<pad>'\nllm_load_print_meta: LF token = 227 '<0x0A>'\nllm_load_print_meta: EOT token = 107 '<end_of_turn>'\nllm_load_print_meta: max token length = 48\nllm_load_tensors: ggml ctx size = 0.23 MiB\nllm_load_tensors: CPU buffer size = 15868.49 MiB\n............................................................................................\n","output_type":"stream"},{"name":"stdout","text":"Instruction count: 64\nTarget layer index: 20\n","output_type":"stream"},{"name":"stderr","text":"llama_new_context_with_model: n_ctx = 512\nllama_new_context_with_model: n_batch = 512\nllama_new_context_with_model: n_ubatch = 512\nllama_new_context_with_model: flash_attn = 0\nllama_new_context_with_model: freq_base = 10000.0\nllama_new_context_with_model: freq_scale = 1\nllama_kv_cache_init: CPU KV buffer size = 184.00 MiB\nllama_new_context_with_model: KV self size = 184.00 MiB, K (f16): 92.00 MiB, V (f16): 92.00 MiB\nllama_new_context_with_model: CPU output buffer size = 0.98 MiB\nllama_new_context_with_model: CPU compute buffer size = 509.00 MiB\nllama_new_context_with_model: graph nodes = 1850\nllama_new_context_with_model: graph splits = 1\n100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 128/128 [16:51<00:00, 7.90s/it]\n/tmp/ipykernel_36/504046463.py:321: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n return torch.load(file_path)\n","output_type":"stream"},{"name":"stdout","text":"tensor([-1.1803e-02, -2.3992e-03, 6.0799e-03, ..., -3.0787e-05,\n -5.7875e-04, -1.0906e-02])\n","output_type":"stream"},{"execution_count":28,"output_type":"execute_result","data":{"text/plain":"0"},"metadata":{}}]},{"cell_type":"markdown","source":"# Remove temporary harmful and harmless hidden state files","metadata":{}},{"cell_type":"code","source":"%cd /kaggle/working/glm-4-9b-chat\n!rm -r ./harmless_states\n!rm -r ./harmful_states","metadata":{"execution":{"iopub.status.busy":"2024-08-24T08:48:12.219253Z","iopub.execute_input":"2024-08-24T08:48:12.219689Z","iopub.status.idle":"2024-08-24T08:48:14.543226Z","shell.execute_reply.started":"2024-08-24T08:48:12.219649Z","shell.execute_reply":"2024-08-24T08:48:14.541831Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stdout","text":"/kaggle/working/glm-4-9b-chat\n","output_type":"stream"},{"name":"stderr","text":"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n pid, fd = os.forkpty()\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Modify individual safetensors files separately to save memory\n\nTo save space in kaggle, I will download each split separately and patch it, then upload it to my own repo.\n\nAll of the smaller files will be uploaded as a folder.\n\nBe sure to change the repo to your newly created huggingface repo and set all your kaggle secrets for reading and writing to hf!","metadata":{}},{"cell_type":"code","source":"%cd /kaggle/working\n\nimport gc\ngc.collect()\n\nfrom safetensors import safe_open\nfrom safetensors.torch import save_file\nfrom typing import Optional, Tuple\n\nimport einops\nimport jaxtyping\nimport torch\n\nfrom huggingface_hub import hf_hub_download\nfrom huggingface_hub import upload_folder\nfrom huggingface_hub import upload_file\n\nfrom transformers import AutoConfig\n\nlocal_repo_dir = \"/kaggle/working/gemma-2-27b-it\"\n\nrepo_id = \"byroneverson/gemma-2-27b-it-abliterated\"\n\nfrom kaggle_secrets import UserSecretsClient\n\nuser_secrets = UserSecretsClient()\nread_token = user_secrets.get_secret(\"hf_read\")\nwrite_token = user_secrets.get_secret(\"hf_write\")\n\n# Download necessary files\ntry:\n for filename in [\"config.json\", \n \"generation_config.json\",\n \"model.safetensors.index.json\", \n \"special_tokens_map.json\", \n \"tokenizer.json\", \n \"tokenizer.model\", \n \"tokenizer_config.json\"]:\n hf_hub_download(repo_id=\"google/gemma-2-27b-it\", filename=filename, local_dir=local_repo_dir, use_auth_token=read_token)\nexcept Exception as e:\n print(f\"Error downloading {filename}: {e}\")\n\n# Upload smaller files first\ntry:\n upload_folder(folder_path=local_repo_dir, repo_id=repo_id, token=write_token)\nexcept Exception as e:\n print(f\"Error uploading folder: {e}\")\n\nconfig = AutoConfig.from_pretrained(local_repo_dir, local_files_only=True, trust_remote_code=True)\nrefusal_direction = torch.load(local_repo_dir + \"/\" + \"refusal_direction.pt\").to(torch.float32)\n\ndef orthogonalize_matrix(matrix: jaxtyping.Float[torch.Tensor, \"... d\"], \n direction: jaxtyping.Float[torch.Tensor, \"d\"]) -> jaxtyping.Float[torch.Tensor, \"... d\"]:\n proj = einops.einsum(matrix, direction.view(-1, 1), \"... d, d single -> ... single\") * direction\n return matrix - proj\n\ndef load_safetensors_file(file_path):\n \"\"\"Loads a single safetensors file into a dictionary of tensors.\n Args:\n file_path (str): Path to the safetensors file.\n Returns:\n dict: A dictionary containing the loaded tensors.\n \"\"\"\n tensors = {}\n with safe_open(file_path, framework=\"pt\", device=\"cpu\") as f:\n #print(f.metadata())\n for key in f.keys():\n tensors[key] = f.get_tensor(key)\n return tensors\n\n# Make sure safetensors count matches the actual count for the model you are modifying\nsafetensors_count = 12\ndevice = refusal_direction.device\n# TODO: Add in skip start and end layers logic\n# I forgot to in v1.0 but the abliterated output model still worked great so I didn't even notice\nfor idx in range(safetensors_count):\n gc.collect()\n \n filename = \"model-\" + str(idx + 1).zfill(5) + \"-of-\" + str(safetensors_count).zfill(5) + \".safetensors\"\n print(filename)\n \n # Download file \n temp_dir = \"/kaggle/temp\"\n hf_hub_download(repo_id=\"google/gemma-2-27b-it\", filename=filename, local_dir=temp_dir, use_auth_token=read_token)\n \n file_path = temp_dir + \"/\" + filename\n tensors = load_safetensors_file(file_path)\n \n for tensor in tensors:\n # tok_embeddings\n if \".embed_tokens.weight\" in tensor:\n print(\"β€’ \" + tensor)\n dtype = tensors[tensor].dtype\n t = tensors[tensor].to(torch.float32).to(device)\n tensors[tensor].copy_(orthogonalize_matrix(t, refusal_direction).to(dtype))\n t = []\n \n # attention.wo\n if \".self_attn.o_proj.weight\" in tensor:\n print(\"β€’ \" + tensor)\n dtype = tensors[tensor].dtype\n t = tensors[tensor].to(torch.float32).to(device)\n t_rearranged = einops.rearrange(t, \"m (n h) -> n h m\", n=config.num_attention_heads).to(device)\n t_orthogonalized = orthogonalize_matrix(t_rearranged, refusal_direction)\n t_rearranged = einops.rearrange(t_orthogonalized, \"n h m -> m (n h)\", n=config.num_attention_heads)\n tensors[tensor].copy_(t_rearranged.to(dtype))\n t = []\n t_rearranged = []\n t_orthogonalized = []\n \n # feed_forward.w2\n if \".mlp.down_proj.weight\" in tensor:\n print(\"β€’ \" + tensor)\n dtype = tensors[tensor].dtype\n t = tensors[tensor].to(torch.float32).to(device)\n t_transposed = t.T.to(device)\n t_orthogonalized = orthogonalize_matrix(t_transposed, refusal_direction)\n t_transposed = t_orthogonalized.T\n tensors[tensor].copy_(t_transposed.to(dtype))\n t = []\n t_transposed = []\n t_orthogonalized = []\n \n # Save file\n save_file(tensors, file_path, metadata={'format': 'pt'})\n \n # Upload file to your repo\n upload_file(path_or_fileobj=file_path, path_in_repo=filename, repo_id=repo_id, token=write_token)\n \n import os\n if os.path.exists(file_path):\n os.remove(file_path)\n else:\n print(\"Remove error: The file does not exist\")\n\n# Patching done\nprint(\"done!\")\n","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Use GGUF My Repo space on HF to convert abliterated model back to GGUF\n\n# Test in your favorite llama.cpp environment","metadata":{}}]}