Sean-Case commited on
Commit
32cf9fb
·
1 Parent(s): e09dd3b

Hopefully fixed install and load of LLM model on systems without a HF_HOME environmental variable

Browse files
Files changed (1) hide show
  1. funcs/representation_model.py +63 -33
funcs/representation_model.py CHANGED
@@ -5,6 +5,7 @@ from bertopic.representation import LlamaCPP
5
  from llama_cpp import Llama
6
  from pydantic import BaseModel
7
  import torch.cuda
 
8
 
9
  from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration
10
  from funcs.prompts import capybara_prompt, capybara_start, open_hermes_prompt, open_hermes_start, stablelm_prompt, stablelm_start
@@ -14,36 +15,6 @@ random_seed = 42
14
  chosen_prompt = open_hermes_prompt # stablelm_prompt
15
  chosen_start_tag = open_hermes_start # stablelm_start
16
 
17
- # Find model file
18
- def find_model_file(hf_model_name, hf_model_file, search_folder):
19
- hf_loc = search_folder #os.environ["HF_HOME"]
20
- hf_sub_loc = search_folder + "/hub/" #os.environ["HF_HOME"]
21
-
22
- hf_model_name_path = hf_sub_loc + 'models--' + hf_model_name.replace("/","--")
23
-
24
- print(hf_model_name_path)
25
-
26
- def find_file(root_folder, file_name):
27
- for root, dirs, files in os.walk(root_folder):
28
- if file_name in files:
29
- return os.path.join(root, file_name)
30
- return None
31
-
32
- # Example usage
33
- folder_path = hf_model_name_path # Replace with your folder path
34
- file_to_find = hf_model_file # Replace with the file name you're looking for
35
-
36
- found_file = find_file(folder_path, file_to_find) # os.environ["HF_HOME"]
37
- if found_file:
38
- print(f"File found: {found_file}")
39
- return found_file
40
- else:
41
- error = "File not found."
42
- print(error, " Downloading model from hub")
43
- from huggingface_hub import hf_hub_download
44
- hf_hub_download(repo_id=hf_model_name, filename='phi-2-orange.Q5_K_M.gguf')
45
- found_file = find_file(folder_path, file_to_find)
46
- return found_file
47
 
48
 
49
 
@@ -121,15 +92,74 @@ keybert = KeyBERTInspired(random_state=random_seed)
121
  # MMR
122
  mmr = MaximalMarginalRelevance(diversity=0.2)
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  def create_representation_model(create_llm_topic_labels, llm_config, hf_model_name, hf_model_file, chosen_start_tag, low_resource_mode):
125
 
126
  if create_llm_topic_labels == "Yes":
127
  # Use llama.cpp to load in model
128
 
129
- # Check for HF_HOME environment variable and supply a default value if it's not found (current folder)
130
- hf_home_value = os.getenv("HF_HOME", '.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- found_file = find_model_file(hf_model_name, hf_model_file, hf_home_value)
133
 
134
  llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx) #**llm_config.model_dump())#
135
  #print(llm.n_gpu_layers)
 
5
  from llama_cpp import Llama
6
  from pydantic import BaseModel
7
  import torch.cuda
8
+ from huggingface_hub import hf_hub_download, HfFolder
9
 
10
  from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration
11
  from funcs.prompts import capybara_prompt, capybara_start, open_hermes_prompt, open_hermes_start, stablelm_prompt, stablelm_start
 
15
  chosen_prompt = open_hermes_prompt # stablelm_prompt
16
  chosen_start_tag = open_hermes_start # stablelm_start
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
 
 
92
  # MMR
93
  mmr = MaximalMarginalRelevance(diversity=0.2)
94
 
95
+ # Find model file
96
+ def find_model_file(hf_model_name, hf_model_file, search_folder):
97
+ hf_loc = search_folder #os.environ["HF_HOME"]
98
+ hf_sub_loc = search_folder + "/hub/" #os.environ["HF_HOME"]
99
+
100
+ hf_model_name_path = hf_sub_loc + 'models--' + hf_model_name.replace("/","--")
101
+
102
+ print(hf_model_name_path)
103
+
104
+ def find_file(root_folder, file_name):
105
+ for root, dirs, files in os.walk(root_folder):
106
+ if file_name in files:
107
+ return os.path.join(root, file_name)
108
+ return None
109
+
110
+ # Example usage
111
+ folder_path = hf_model_name_path # Replace with your folder path
112
+ file_to_find = hf_model_file # Replace with the file name you're looking for
113
+
114
+ found_file = find_file(folder_path, file_to_find) # os.environ["HF_HOME"]
115
+ if found_file:
116
+ print(f"Model file found: {found_file}")
117
+ return found_file
118
+ else:
119
+ error = "File not found."
120
+ print(error, " Downloading model from hub")
121
+
122
+ # Specify your custom directory
123
+ # Get HF_HOME environment variable or default to "~/.cache/huggingface/hub"
124
+ hf_home_value = search_folder
125
+
126
+ # Check if the directory exists, create it if it doesn't
127
+ if not os.path.exists(hf_home_value):
128
+ os.makedirs(hf_home_value)
129
+
130
+ print("Downloading model to: ", hf_home_value)
131
+
132
+ hf_hub_download(repo_id=hf_model_name, filename='phi-2-orange.Q5_K_M.gguf', cache_dir=hf_home_value)
133
+
134
+ found_file = find_file(hf_home_value, file_to_find)
135
+ return found_file
136
+
137
+
138
  def create_representation_model(create_llm_topic_labels, llm_config, hf_model_name, hf_model_file, chosen_start_tag, low_resource_mode):
139
 
140
  if create_llm_topic_labels == "Yes":
141
  # Use llama.cpp to load in model
142
 
143
+ # This was for testing on systems without a HF_HOME env variable
144
+ os.unsetenv("HF_HOME")
145
+
146
+ #if "HF_HOME" in os.environ:
147
+ # del os.environ["HF_HOME"]
148
+
149
+ # Check for HF_HOME environment variable and supply a default value if it's not found (typical location for huggingface models)
150
+ # Get HF_HOME environment variable or default to "~/.cache/huggingface/hub"
151
+ hf_home_value = os.getenv("HF_HOME", "~/.cache/huggingface/hub")
152
+
153
+ # Expand the user symbol '~' to the full home directory path
154
+ hf_home_value = os.path.expanduser(hf_home_value)
155
+
156
+ # Check if the directory exists, create it if it doesn't
157
+ if not os.path.exists(hf_home_value):
158
+ os.makedirs(hf_home_value)
159
+
160
+ print(hf_home_value)
161
 
162
+ found_file = find_model_file(hf_model_name, hf_model_file, hf_home_value)
163
 
164
  llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx) #**llm_config.model_dump())#
165
  #print(llm.n_gpu_layers)