from pdfminer.high_level import extract_pages from pdfminer.layout import LTTextContainer from tqdm import tqdm import re import gradio as gr import os import accelerate import spaces import subprocess from huggingface_hub import hf_hub_download from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider from llama_cpp_agent.chat_history import BasicChatHistory from llama_cpp_agent.chat_history.messages import Roles from vllm import LLM, SamplingParams # subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True) # subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True) # hf_hub_download( # repo_id="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF", # filename="Meta-Llama-3-8B-Instruct.Q8_0.gguf", # local_dir = "./models" # ) # hf_hub_download( # repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", # filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", # local_dir = "./models" # ) def process_document(pdf_path, page_ids=None): extracted_pages = extract_pages(pdf_path, page_numbers=page_ids) page2content = {} for extracted_page in tqdm(extracted_pages): page_id = extracted_page.pageid content = process_page(extracted_page) page2content[page_id] = content return page2content def process_page(extracted_page): content = [] elements = [element for element in extracted_page._objs] elements.sort(key=lambda a: a.y1, reverse=True) for i, element in enumerate(elements): if isinstance(element, LTTextContainer): line_text = extract_text_and_normalize(element) content.append(line_text) content = re.sub('\n+', ' ', ''.join(content)) return content def extract_text_and_normalize(element): # Extract text from line and split it with new lines line_texts = element.get_text().split('\n') norm_text = '' for line_text in line_texts: line_text = line_text.strip() if not line_text: line_text = '\n' else: line_text = re.sub('\s+', ' ', line_text) if not re.search('[\w\d\,\-]', line_text[-1]): line_text += '\n' else: line_text += ' ' norm_text += line_text return norm_text def txt_to_html(text): html_content = "" for line in text.split('\n'): html_content += "

{}

".format(line.strip()) html_content += "" return html_content @spaces.GPU(duration=120) def deidentify_doc(pdftext, maxtokens, temperature, top_probability): prompt = "In the following text replace any person name and any address with term [redacted], replace any Date of Birth and NHS number with term [redacted]. Output the modified text." # prompt = """ # Perform the following actions on given report: # 1. Replace any person names, age, date of birth, gender with term [redacted] # 2. Replace any addresses with term [redacted] # 3. DO NOT REPLACE ANY MEDICAL MEASUREMENTS # 4. Replace only the CALENDAR DATES of format 'day/month/year' with term [redacted] # """ # prompt = """ # Remove person names, any dates, date of birth, age, gender, registration numbers and any addresses from the following report. # """ # model = Llama( # model_path=model_id, # flash_attn=True, # n_gpu_layers=81, # n_batch=1024, # n_ctx=8192, # ) # chat_template = MessagesFormatterType.LLAMA_3 sampling_params = SamplingParams(temperature=0.8, top_p=0.95) # llm = Llama( # model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf", # flash_attn=True, # n_gpu_layers=81, # n_batch=1024, # n_ctx=8192, # ) llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct") outputs = llm.generate([prompt], sampling_params) output = outputs[0].outputs[0].text # provider = LlamaCppPythonProvider(llm) # agent = LlamaCppAgent( # provider, # system_prompt="You are a helpful assistant.", # predefined_messages_formatter_type=chat_template, # debug_output=True # ) # settings = provider.get_provider_default_settings() # settings.temperature = temperature # settings.top_k = 40 # settings.top_p = top_probability # settings.max_tokens = maxtokens # settings.repeat_penalty = 1.1 # settings.stream = True # messages = BasicChatHistory() # stream = agent.get_chat_response( # prompt + ' : ' + pdftext, # llm_sampling_settings=settings, # chat_history=messages, # returns_streaming_generator=True, # print_output=False # ) # output = "" # for op in stream: # output += op # output = llm.create_chat_completion( # messages=[ # {"role": "assistant", "content": prompt}, # { # "role": "user", # "content": pdftext # } # ], # max_tokens=maxtokens, # temperature=temperature # ) # output = output['choices'][0]['message']['content'] prompt = "Perform the following actions on given text: 1. Replace any person age with term [redacted] 2. DO NOT REPLACE ANY MEDICAL MEASUREMENTS 3. Replace only the CALENDAR DATES of format 'day/month/year' with term [redacted]. Output the modified text." outputs = llm.generate([output], sampling_params) output = outputs[0].outputs[0].text # output = llm.create_chat_completion( # messages=[ # {"role": "assistant", "content": prompt}, # { # "role": "user", # "content": output # } # ], # max_tokens=maxtokens, # temperature=temperature # ) # output = output['choices'][0]['message']['content'] # print(prompt) # print(output) # print('-------------------------------------------------------') return output def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95): files=[files]#remove later for file in files: if not file: return 'Please provide a valid PDF' file_name = os.path.basename(file) file_name_splt = file_name.split('.') # print('File name is ', file_name) if (len(file_name_splt) > 1 and file_name_splt[1] == 'pdf'): page2content = process_document(file, page_ids=[0]) pdftext = page2content[1] print(pdftext) # pdftext = file # remove later if (pdftext): #shift this if block to right later anonymized_text = deidentify_doc(pdftext, maxtokens, temperature, top_probability) else: anonymized_text = 'PDF file appears to be corrupted.' return anonymized_text # model_id = "models/Meta-Llama-3-70B-Instruct-Q3_K_M.gguf" # model = Llama(model_path=model_id, n_ctx=2048, n_threads=8, n_gpu_layers=-1, n_batch=128) css = ".gradio-container {background: 'logo.png'}" temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value") prob_slider = gr.Slider(minimum=0, maximum=1, value=0.95, label="Max Probability Value") max_tokens = gr.Number(value=600, label="Max Tokens") input_folder = gr.File(file_count='multiple') input_folder_text = gr.Textbox(label='Enter output folder path') output_text = gr.Textbox() output_path_component = gr.File(label="Select Output Path") iface = gr.Interface( fn=pdf_to_text, inputs=['file'], # css = css, outputs=output_text, title='COBIx Endoscopy Report De-Identification', description="This application assists to remove personal information from the uploaded clinical report", theme=gr.themes.Soft(), ) iface.launch() # import spaces # import json # import subprocess # from llama_cpp import Llama # from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType # from llama_cpp_agent.providers import LlamaCppPythonProvider # from llama_cpp_agent.chat_history import BasicChatHistory # from llama_cpp_agent.chat_history.messages import Roles # import gradio as gr # from huggingface_hub import hf_hub_download # hf_hub_download( # repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", # filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", # local_dir = "./models" # ) # # hf_hub_download( # # repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF", # # filename="Mistral-7B-Instruct-v0.3-f32.gguf", # # local_dir = "./models" # # ) # css = """ # .message-row { # justify-content: space-evenly !important; # } # .message-bubble-border { # border-radius: 6px !important; # } # .message-buttons-bot, .message-buttons-user { # right: 10px !important; # left: auto !important; # bottom: 2px !important; # } # .dark.message-bubble-border { # border-color: #343140 !important; # } # .dark.user { # background: #1e1c26 !important; # } # .dark.assistant.dark, .dark.pending.dark { # background: #16141c !important; # } # """ # def get_messages_formatter_type(model_name): # if "Llama" in model_name: # return MessagesFormatterType.LLAMA_3 # elif "Mistral" in model_name: # return MessagesFormatterType.MISTRAL # else: # raise ValueError(f"Unsupported model: {model_name}") # @spaces.GPU(duration=60) # def respond( # message, # history: list[tuple[str, str]], # model, # system_message, # max_tokens, # temperature, # top_p, # top_k, # repeat_penalty, # ): # chat_template = get_messages_formatter_type(model) # llm = Llama( # model_path=f"models/{model}", # flash_attn=True, # n_gpu_layers=81, # n_batch=1024, # n_ctx=8192, # ) # provider = LlamaCppPythonProvider(llm) # agent = LlamaCppAgent( # provider, # system_prompt=f"{system_message}", # predefined_messages_formatter_type=chat_template, # debug_output=True # ) # settings = provider.get_provider_default_settings() # settings.temperature = temperature # settings.top_k = top_k # settings.top_p = top_p # settings.max_tokens = max_tokens # settings.repeat_penalty = repeat_penalty # settings.stream = True # messages = BasicChatHistory() # for msn in history: # user = { # 'role': Roles.user, # 'content': msn[0] # } # assistant = { # 'role': Roles.assistant, # 'content': msn[1] # } # messages.add_message(user) # messages.add_message(assistant) # stream = agent.get_chat_response( # message, # llm_sampling_settings=settings, # chat_history=messages, # returns_streaming_generator=True, # print_output=False # ) # outputs = "" # for output in stream: # outputs += output # yield outputs # PLACEHOLDER = """ #
#
# Logo #
#
#

llama-cpp-agent

#

The llama-cpp-agent framework simplifies interactions with Large Language Models (LLMs), providing an interface for chatting, executing function calls, generating structured output, performing retrieval augmented generation, and processing text using agentic chains with tools.

#
#
# # Mistral 7B Instruct v0.3 # # # Meta Llama 3 70B Instruct # #
#
# # # Discord # # # # # # GitHub # # # #
#
#
#
# """ # demo = gr.ChatInterface( # respond, # additional_inputs=[ # gr.Dropdown([ # 'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', # 'Mistral-7B-Instruct-v0.3-f32.gguf' # ], # value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", # label="Model" # ), # gr.Textbox(value="You are a helpful assistant.", label="System message"), # gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"), # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # gr.Slider( # minimum=0.1, # maximum=1.0, # value=0.95, # step=0.05, # label="Top-p", # ), # gr.Slider( # minimum=0, # maximum=100, # value=40, # step=1, # label="Top-k", # ), # gr.Slider( # minimum=0.0, # maximum=2.0, # value=1.1, # step=0.1, # label="Repetition penalty", # ), # ], # theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set( # body_background_fill_dark="#16141c", # block_background_fill_dark="#16141c", # block_border_width="1px", # block_title_background_fill_dark="#1e1c26", # input_background_fill_dark="#292733", # button_secondary_background_fill_dark="#24212b", # border_color_accent_dark="#343140", # border_color_primary_dark="#343140", # background_fill_secondary_dark="#16141c", # color_accent_soft_dark="transparent", # code_background_fill_dark="#292733", # ), # css=css, # retry_btn="Retry", # undo_btn="Undo", # clear_btn="Clear", # submit_btn="Send", # description="Llama-cpp-agent: Chat multi llm selection", # chatbot=gr.Chatbot( # scale=1, # placeholder=PLACEHOLDER, # likeable=False, # show_copy_button=True # ) # ) # # if __name__ == "__main__": # demo.launch()