Spaces:

seanpedrickcase
/

topic_modelling

Running

File size: 6,391 Bytes

9dbf344
 
 
 
 
 
 
32cf9fb
9dbf344
 
 
 
e0f53cc
ff32b4a
9dbf344
 
 
 
ff32b4a
9dbf344
 
 
 
 
 
 
 
 
 
 
4cfed8e
9dbf344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82b1ab1
9dbf344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0f53cc
0b7839c
fac3624
9dbf344
32cf9fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3df37
9dbf344
4cfed8e
 
ff32b4a
32cf9fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff32b4a
32cf9fb
ff32b4a
82b1ab1
4cfed8e
 
9dbf344
4cfed8e
 
 
ffe5eb2
4cfed8e
9dbf344
4cfed8e
aa3df37
 
 
 
 
9dbf344
4cfed8e
82b1ab1
4cfed8e
 
9dbf344
4cfed8e
 
 
 
 
9dbf344

import os
#from ctransformers import AutoModelForCausalLM
#from transformers import AutoTokenizer, pipeline
from bertopic.representation import LlamaCPP
from llama_cpp import Llama
from pydantic import BaseModel
import torch.cuda
from huggingface_hub import hf_hub_download, HfFolder

from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration
from funcs.prompts import capybara_prompt, capybara_start, open_hermes_prompt, open_hermes_start, stablelm_prompt, stablelm_start

random_seed = 42

chosen_prompt = open_hermes_prompt # stablelm_prompt 
chosen_start_tag =  open_hermes_start # stablelm_start




# Currently set n_gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
if torch.cuda.is_available():
    torch_device = "gpu"
    low_resource_mode = "No"
    n_gpu_layers = 100
else: 
    torch_device =  "cpu"
    low_resource_mode = "Yes"
    n_gpu_layers = 0

low_resource_mode = "No" # Override for testing

#print("Running on device:", torch_device)
n_threads = torch.get_num_threads()
print("CPU n_threads:", n_threads)

# Default Model parameters
temperature: float = 0.1
top_k: int = 3
top_p: float = 1
repeat_penalty: float = 1.1
last_n_tokens_size: int = 128
max_tokens: int = 500
seed: int = 42
reset: bool = True
stream: bool = False
n_threads: int = n_threads
n_batch:int = 256
n_ctx:int = 4096
sample:bool = True
trust_remote_code:bool =True

class LLamacppInitConfigGpu(BaseModel):
    last_n_tokens_size: int
    seed: int
    n_threads: int
    n_batch: int
    n_ctx: int
    n_gpu_layers: int
    temperature: float
    top_k: int
    top_p: float
    repeat_penalty: float
    max_tokens: int
    reset: bool
    stream: bool
    stop: str
    trust_remote_code:bool

    def update_gpu(self, new_value: int):
        self.n_gpu_layers = new_value

llm_config = LLamacppInitConfigGpu(last_n_tokens_size=last_n_tokens_size,
    seed=seed,
    n_threads=n_threads,
    n_batch=n_batch,
    n_ctx=n_ctx,
    n_gpu_layers=n_gpu_layers,
    temperature=temperature,
    top_k=top_k,
    top_p=top_p,
    repeat_penalty=repeat_penalty,
    max_tokens=max_tokens,
    reset=reset,
    stream=stream,
    stop=chosen_start_tag,
    trust_remote_code=trust_remote_code)

## Create representation model parameters ##
# KeyBERT
keybert = KeyBERTInspired(random_state=random_seed)
# MMR
mmr = MaximalMarginalRelevance(diversity=0.2)

# Find model file
def find_model_file(hf_model_name, hf_model_file, search_folder):
    hf_loc = search_folder #os.environ["HF_HOME"]
    hf_sub_loc = search_folder + "/hub/" #os.environ["HF_HOME"] 

    hf_model_name_path = hf_sub_loc + 'models--' + hf_model_name.replace("/","--")

    print(hf_model_name_path)

    def find_file(root_folder, file_name):
        for root, dirs, files in os.walk(root_folder):
            if file_name in files:
                return os.path.join(root, file_name)
        return None

    # Example usage
    folder_path = hf_model_name_path  # Replace with your folder path
    file_to_find = hf_model_file         # Replace with the file name you're looking for

    found_file = find_file(folder_path, file_to_find) # os.environ["HF_HOME"]
    if found_file:
        print(f"Model file found: {found_file}")
        return found_file
    else:
        error = "File not found."
        print(error, " Downloading model from hub")

        # Specify your custom directory
        # Get HF_HOME environment variable or default to "~/.cache/huggingface/hub"
        hf_home_value = search_folder

        # Check if the directory exists, create it if it doesn't
        if not os.path.exists(hf_home_value):
            os.makedirs(hf_home_value)

        print("Downloading model to: ", hf_home_value)
       
        hf_hub_download(repo_id=hf_model_name, filename='phi-2-orange.Q5_K_M.gguf', cache_dir=hf_home_value)

        found_file = find_file(hf_home_value, file_to_find)
        return found_file


def create_representation_model(create_llm_topic_labels, llm_config, hf_model_name, hf_model_file, chosen_start_tag, low_resource_mode):

    if create_llm_topic_labels == "Yes":
        # Use llama.cpp to load in model

        # This was for testing on systems without a HF_HOME env variable
        os.unsetenv("HF_HOME") 

        #if "HF_HOME" in os.environ:
        #    del os.environ["HF_HOME"]     

        # Check for HF_HOME environment variable and supply a default value if it's not found (typical location for huggingface models)
        # Get HF_HOME environment variable or default to "~/.cache/huggingface/hub"
        hf_home_value = os.getenv("HF_HOME", "~/.cache/huggingface/hub")

        # Expand the user symbol '~' to the full home directory path
        hf_home_value = os.path.expanduser(hf_home_value)

        # Check if the directory exists, create it if it doesn't
        if not os.path.exists(hf_home_value):
            os.makedirs(hf_home_value)

        print(hf_home_value)

        found_file = find_model_file(hf_model_name, hf_model_file,  hf_home_value)

        llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx) #**llm_config.model_dump())# 
        #print(llm.n_gpu_layers)
        llm_model = LlamaCPP(llm, prompt=chosen_prompt)#, **gen_config.model_dump())

        # All representation models
        representation_model = {
        "KeyBERT": keybert,
        "Phi": llm_model
        }

    elif create_llm_topic_labels == "No":
        if low_resource_mode == "Yes":
            #representation_model = {"mmr": mmr}
            representation_model = {"KeyBERT": keybert}
        else:
            representation_model = {"KeyBERT": keybert}

    # Deprecated example using CTransformers. This package is not really used anymore
    #model = AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', hf=True, **vars(llm_config))
    #tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
    #generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

    # Text generation with Llama 2
    #mistral_capybara = TextGeneration(generator, prompt=capybara_prompt)
    #mistral_hermes = TextGeneration(generator, prompt=open_hermes_prompt)
        
    return representation_model