Spaces:
Running
Running
File size: 5,224 Bytes
9dbf344 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import os
#from ctransformers import AutoModelForCausalLM
#from transformers import AutoTokenizer, pipeline
from bertopic.representation import LlamaCPP
from llama_cpp import Llama
from pydantic import BaseModel
import torch.cuda
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration
from funcs.prompts import capybara_prompt, capybara_start, open_hermes_prompt, open_hermes_start, stablelm_prompt, stablelm_start
#from huggingface_hub import hf_hub_download
#hf_hub_download(repo_id='second-state/stablelm-2-zephyr-1.6b-GGUF', filename='stablelm-2-zephyr-1_6b-Q5_K_M.gguf')
hf_model_name = 'TheBloke/phi-2-orange-GGUF' #'NousResearch/Nous-Capybara-7B-V1.9-GGUF' # 'second-state/stablelm-2-zephyr-1.6b-GGUF'
hf_model_file = 'phi-2-orange.Q5_K_M.gguf' #'Capybara-7B-V1.9-Q5_K_M.gguf' # 'stablelm-2-zephyr-1_6b-Q5_K_M.gguf'
chosen_prompt = open_hermes_prompt # stablelm_prompt
chosen_start_tag = open_hermes_start # stablelm_start
# Find model file
def find_model_file(hf_model_name, hf_model_file):
hf_loc = os.environ["HF_HOME"]
hf_sub_loc = os.environ["HF_HOME"] + "/hub/"
hf_model_name_path = hf_sub_loc + 'models--' + hf_model_name.replace("/","--")
print(hf_model_name_path)
def find_file(root_folder, file_name):
for root, dirs, files in os.walk(root_folder):
if file_name in files:
return os.path.join(root, file_name)
return None
# Example usage
folder_path = hf_model_name_path # Replace with your folder path
file_to_find = hf_model_file # Replace with the file name you're looking for
found_file = find_file(folder_path, file_to_find)
if found_file:
print(f"File found: {found_file}")
return found_file
else:
error = "File not found."
print(error)
return error
found_file = find_model_file(hf_model_name, hf_model_file)
# Currently set n_gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
if torch.cuda.is_available():
torch_device = "gpu"
low_resource_mode = "No"
n_gpu_layers = 100
else:
torch_device = "cpu"
low_resource_mode = "Yes"
n_gpu_layers = 0
#low_resource_mode = "Yes"
#print("Running on device:", torch_device)
n_threads = torch.get_num_threads()
print("CPU n_threads:", n_threads)
# Default Model parameters
temperature: float = 0.1
top_k: int = 3
top_p: float = 1
repeat_penalty: float = 1.1
last_n_tokens_size: int = 128
max_tokens: int = 500
seed: int = 42
reset: bool = True
stream: bool = False
n_threads: int = n_threads
n_batch:int = 256
n_ctx:int = 4096
sample:bool = True
trust_remote_code:bool =True
class LLamacppInitConfigGpu(BaseModel):
last_n_tokens_size: int
seed: int
n_threads: int
n_batch: int
n_ctx: int
n_gpu_layers: int
temperature: float
top_k: int
top_p: float
repeat_penalty: float
max_tokens: int
reset: bool
stream: bool
stop: str
trust_remote_code:bool
def update_gpu(self, new_value: int):
self.n_gpu_layers = new_value
gpu_config = LLamacppInitConfigGpu(last_n_tokens_size=last_n_tokens_size,
seed=seed,
n_threads=n_threads,
n_batch=n_batch,
n_ctx=n_ctx,
n_gpu_layers=n_gpu_layers,
temperature=temperature,
top_k=top_k,
top_p=top_p,
repeat_penalty=repeat_penalty,
max_tokens=max_tokens,
reset=reset,
stream=stream,
stop=chosen_start_tag,
trust_remote_code=trust_remote_code)
cpu_config = gpu_config.model_copy()
cpu_config.update_gpu(0)
class LLamacppGenerateConfig(BaseModel):
temperature: float
top_k: int
top_p: float
repeat_penalty: float
max_tokens: int
reset: bool
stream: bool
gen_config = LLamacppGenerateConfig(
temperature=temperature,
top_k=top_k,
top_p=top_p,
repeat_penalty=repeat_penalty,
max_tokens=max_tokens,
reset=reset,
stream=stream)
## Create representation model parameters ##
# KeyBERT
keybert = KeyBERTInspired()
if low_resource_mode == "No":
# Use llama.cpp to load in model
llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=n_gpu_layers, n_ctx=n_ctx) #**gpu_config.model_dump())#
#print(llm.n_gpu_layers)
llm_model = LlamaCPP(llm, prompt=chosen_prompt)#, **gen_config.model_dump())
# All representation models
representation_model = {
"KeyBERT": keybert,
"Mistral": llm_model
}
elif low_resource_mode == "Yes":
representation_model = {"KeyBERT": keybert}
# Deprecated example using CTransformers. This package is not really used anymore
#model = AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', hf=True, **vars(gpu_config))
#tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
#generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
# Text generation with Llama 2
#mistral_capybara = TextGeneration(generator, prompt=capybara_prompt)
#mistral_hermes = TextGeneration(generator, prompt=open_hermes_prompt)
# MMR (is rubbish, don't use)
#mmr = MaximalMarginalRelevance(diversity=0.3)
|