Spaces:
Running
Running
File size: 6,391 Bytes
9dbf344 32cf9fb 9dbf344 e0f53cc ff32b4a 9dbf344 ff32b4a 9dbf344 4cfed8e 9dbf344 82b1ab1 9dbf344 e0f53cc 0b7839c fac3624 9dbf344 32cf9fb aa3df37 9dbf344 4cfed8e ff32b4a 32cf9fb ff32b4a 32cf9fb ff32b4a 82b1ab1 4cfed8e 9dbf344 4cfed8e ffe5eb2 4cfed8e 9dbf344 4cfed8e aa3df37 9dbf344 4cfed8e 82b1ab1 4cfed8e 9dbf344 4cfed8e 9dbf344 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import os
#from ctransformers import AutoModelForCausalLM
#from transformers import AutoTokenizer, pipeline
from bertopic.representation import LlamaCPP
from llama_cpp import Llama
from pydantic import BaseModel
import torch.cuda
from huggingface_hub import hf_hub_download, HfFolder
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration
from funcs.prompts import capybara_prompt, capybara_start, open_hermes_prompt, open_hermes_start, stablelm_prompt, stablelm_start
random_seed = 42
chosen_prompt = open_hermes_prompt # stablelm_prompt
chosen_start_tag = open_hermes_start # stablelm_start
# Currently set n_gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
if torch.cuda.is_available():
torch_device = "gpu"
low_resource_mode = "No"
n_gpu_layers = 100
else:
torch_device = "cpu"
low_resource_mode = "Yes"
n_gpu_layers = 0
low_resource_mode = "No" # Override for testing
#print("Running on device:", torch_device)
n_threads = torch.get_num_threads()
print("CPU n_threads:", n_threads)
# Default Model parameters
temperature: float = 0.1
top_k: int = 3
top_p: float = 1
repeat_penalty: float = 1.1
last_n_tokens_size: int = 128
max_tokens: int = 500
seed: int = 42
reset: bool = True
stream: bool = False
n_threads: int = n_threads
n_batch:int = 256
n_ctx:int = 4096
sample:bool = True
trust_remote_code:bool =True
class LLamacppInitConfigGpu(BaseModel):
last_n_tokens_size: int
seed: int
n_threads: int
n_batch: int
n_ctx: int
n_gpu_layers: int
temperature: float
top_k: int
top_p: float
repeat_penalty: float
max_tokens: int
reset: bool
stream: bool
stop: str
trust_remote_code:bool
def update_gpu(self, new_value: int):
self.n_gpu_layers = new_value
llm_config = LLamacppInitConfigGpu(last_n_tokens_size=last_n_tokens_size,
seed=seed,
n_threads=n_threads,
n_batch=n_batch,
n_ctx=n_ctx,
n_gpu_layers=n_gpu_layers,
temperature=temperature,
top_k=top_k,
top_p=top_p,
repeat_penalty=repeat_penalty,
max_tokens=max_tokens,
reset=reset,
stream=stream,
stop=chosen_start_tag,
trust_remote_code=trust_remote_code)
## Create representation model parameters ##
# KeyBERT
keybert = KeyBERTInspired(random_state=random_seed)
# MMR
mmr = MaximalMarginalRelevance(diversity=0.2)
# Find model file
def find_model_file(hf_model_name, hf_model_file, search_folder):
hf_loc = search_folder #os.environ["HF_HOME"]
hf_sub_loc = search_folder + "/hub/" #os.environ["HF_HOME"]
hf_model_name_path = hf_sub_loc + 'models--' + hf_model_name.replace("/","--")
print(hf_model_name_path)
def find_file(root_folder, file_name):
for root, dirs, files in os.walk(root_folder):
if file_name in files:
return os.path.join(root, file_name)
return None
# Example usage
folder_path = hf_model_name_path # Replace with your folder path
file_to_find = hf_model_file # Replace with the file name you're looking for
found_file = find_file(folder_path, file_to_find) # os.environ["HF_HOME"]
if found_file:
print(f"Model file found: {found_file}")
return found_file
else:
error = "File not found."
print(error, " Downloading model from hub")
# Specify your custom directory
# Get HF_HOME environment variable or default to "~/.cache/huggingface/hub"
hf_home_value = search_folder
# Check if the directory exists, create it if it doesn't
if not os.path.exists(hf_home_value):
os.makedirs(hf_home_value)
print("Downloading model to: ", hf_home_value)
hf_hub_download(repo_id=hf_model_name, filename='phi-2-orange.Q5_K_M.gguf', cache_dir=hf_home_value)
found_file = find_file(hf_home_value, file_to_find)
return found_file
def create_representation_model(create_llm_topic_labels, llm_config, hf_model_name, hf_model_file, chosen_start_tag, low_resource_mode):
if create_llm_topic_labels == "Yes":
# Use llama.cpp to load in model
# This was for testing on systems without a HF_HOME env variable
os.unsetenv("HF_HOME")
#if "HF_HOME" in os.environ:
# del os.environ["HF_HOME"]
# Check for HF_HOME environment variable and supply a default value if it's not found (typical location for huggingface models)
# Get HF_HOME environment variable or default to "~/.cache/huggingface/hub"
hf_home_value = os.getenv("HF_HOME", "~/.cache/huggingface/hub")
# Expand the user symbol '~' to the full home directory path
hf_home_value = os.path.expanduser(hf_home_value)
# Check if the directory exists, create it if it doesn't
if not os.path.exists(hf_home_value):
os.makedirs(hf_home_value)
print(hf_home_value)
found_file = find_model_file(hf_model_name, hf_model_file, hf_home_value)
llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx) #**llm_config.model_dump())#
#print(llm.n_gpu_layers)
llm_model = LlamaCPP(llm, prompt=chosen_prompt)#, **gen_config.model_dump())
# All representation models
representation_model = {
"KeyBERT": keybert,
"Phi": llm_model
}
elif create_llm_topic_labels == "No":
if low_resource_mode == "Yes":
#representation_model = {"mmr": mmr}
representation_model = {"KeyBERT": keybert}
else:
representation_model = {"KeyBERT": keybert}
# Deprecated example using CTransformers. This package is not really used anymore
#model = AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', hf=True, **vars(llm_config))
#tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
#generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
# Text generation with Llama 2
#mistral_capybara = TextGeneration(generator, prompt=capybara_prompt)
#mistral_hermes = TextGeneration(generator, prompt=open_hermes_prompt)
return representation_model
|