# # -*- coding: utf-8 -*- # """fiver-app.ipynb # Automatically generated by Colaboratory. # Original file is located at # https://colab.research.google.com/drive/1YQm_fGxa2nfiV8pTN4oBrlzzfefGadaP # """ # !pip uninstall -y numpy # !pip install --ignore-installed numpy==1.22.0 # !pip install langchain # !pip install PyPDF2 # !pip install docx2txt # !pip install gradio # !pip install faiss-gpu # !pip install openai # !pip install tiktoken # !pip install python-docx # !pip install git+https://github.com/openai/whisper.git # !pip install sounddevice # import shutil # import os # def copy_files(source_folder, destination_folder): # # Create the destination folder if it doesn't exist # if not os.path.exists(destination_folder): # os.makedirs(destination_folder) # # Get a list of files in the source folder # files_to_copy = os.listdir(source_folder) # for file_name in files_to_copy: # source_file_path = os.path.join(source_folder, file_name) # destination_file_path = os.path.join(destination_folder, file_name) # # Copy the file to the destination folder # shutil.copy(source_file_path, destination_file_path) # print(f"Copied {file_name} to {destination_folder}") # # Specify the source folder and destination folder paths # source_folder = "/kaggle/input/fiver-app5210" # destination_folder = "/home/user/app/local_db" # copy_files(source_folder, destination_folder) # import shutil # import os # def copy_files(source_folder, destination_folder): # # Create the destination folder if it doesn't exist # if not os.path.exists(destination_folder): # os.makedirs(destination_folder) # # Get a list of files in the source folder # files_to_copy = os.listdir(source_folder) # for file_name in files_to_copy: # source_file_path = os.path.join(source_folder, file_name) # destination_file_path = os.path.join(destination_folder, file_name) # # Copy the file to the destination folder # shutil.copy(source_file_path, destination_file_path) # print(f"Copied {file_name} to {destination_folder}") # # Specify the source folder and destination folder paths # source_folder = "/kaggle/input/fiver-app-docs" # destination_folder = "/home/user/app/docs" # copy_files(source_folder, destination_folder) def api_key(key): import os import openai os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["OPENAI_API_KEY"] = key openai.api_key = key return "Successful!" def save_file(input_file): import shutil import os destination_dir = "/home/user/app/file/" os.makedirs(destination_dir, exist_ok=True) output_dir = "/home/user/app/file/" for file in input_file: shutil.copy(file.name, output_dir) return "File(s) saved successfully!" def process_file(): from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import DirectoryLoader from langchain.document_loaders import TextLoader from langchain.document_loaders import Docx2txtLoader from langchain.vectorstores import FAISS from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter import openai loader1 = DirectoryLoader( "/home/user/app/file/", glob="./*.pdf", loader_cls=PyPDFLoader ) document1 = loader1.load() loader2 = DirectoryLoader( "/home/user/app/file/", glob="./*.txt", loader_cls=TextLoader ) document2 = loader2.load() loader3 = DirectoryLoader( "/home/user/app/file/", glob="./*.docx", loader_cls=Docx2txtLoader ) document3 = loader3.load() document1.extend(document2) document1.extend(document3) text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len ) docs = text_splitter.split_documents(document1) embeddings = OpenAIEmbeddings() file_db = FAISS.from_documents(docs, embeddings) file_db.save_local("/home/user/app/file_db/") return "File(s) processed successfully!" def formatted_response(docs, response): formatted_output = response + "\n\nSources" for i, doc in enumerate(docs): source_info = doc.metadata.get("source", "Unknown source") page_info = doc.metadata.get("page", None) # Get the file name without the directory path file_name = source_info.split("/")[-1].strip() if page_info is not None: formatted_output += f"\n{file_name}\tpage no {page_info}" else: formatted_output += f"\n{file_name}" return formatted_output def search_file(question): from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.callbacks import get_openai_callback from langchain.llms import OpenAI import openai from langchain.chat_models import ChatOpenAI embeddings = OpenAIEmbeddings() file_db = FAISS.load_local("/home/user/app/file_db/", embeddings) docs = file_db.similarity_search(question) llm = ChatOpenAI(model_name="gpt-3.5-turbo") chain = load_qa_chain(llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=question) print(cb) return formatted_response(docs, response) def search_local(question): from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.callbacks import get_openai_callback from langchain.llms import OpenAI import openai from langchain.chat_models import ChatOpenAI embeddings = OpenAIEmbeddings() file_db = FAISS.load_local("/home/user/app/local_db/", embeddings) docs = file_db.similarity_search(question) print(docs) type(docs) llm = ChatOpenAI(model_name="gpt-3.5-turbo") chain = load_qa_chain(llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=question) print(cb) return formatted_response(docs, response) def delete_file(): import shutil path1 = "/home/user/app/file/" path2 = "/home/user/app/file_db/" try: shutil.rmtree(path1) shutil.rmtree(path2) return "Deleted Successfully" except: return "Already Deleted" import os import gradio as gr def list_files(): directory = "/home/user/app/docs" file_list = [] for root, dirs, files in os.walk(directory): for file in files: file_list.append(file) return gr.Dropdown.update(choices=file_list) file_list = list_files() print("List of file names in the directory:") for file_name in file_list: print(file_name) def soap_report(doc_name, question): from langchain.llms import OpenAI from langchain import PromptTemplate, LLMChain import openai import docx docx_path = "/home/user/app/docs/" + doc_name doc = docx.Document(docx_path) extracted_text = "Extracted text:\n\n\n" for paragraph in doc.paragraphs: extracted_text += paragraph.text + "\n" question = ( "\n\nUse the 'Extracted text' to answer the following question:\n" + question ) extracted_text += question if extracted_text: print(extracted_text) else: print("failed") template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) llm = OpenAI() llm_chain = LLMChain(prompt=prompt, llm=llm) response = llm_chain.run(extracted_text) return response def search_gpt(question): from langchain.llms import OpenAI from langchain import PromptTemplate, LLMChain template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) llm = OpenAI() llm_chain = LLMChain(prompt=prompt, llm=llm) response = llm_chain.run(question) return response def local_gpt(question): from langchain.llms import OpenAI from langchain import PromptTemplate, LLMChain template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) llm = OpenAI() llm_chain = LLMChain(prompt=prompt, llm=llm) response = llm_chain.run(question) return response global output global response def audio_text(filepath): import openai global output audio = open(filepath, "rb") transcript = openai.Audio.transcribe("whisper-1", audio) output = transcript["text"] return output def transcript(text): from langchain.llms import OpenAI from langchain import PromptTemplate, LLMChain global response question = ( "Use the following context given below to generate a detailed SOAP Report:\n\n" ) question += text print(question) template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) llm = OpenAI() llm_chain = LLMChain(prompt=prompt, llm=llm) response = llm_chain.run(question) return response def text_soap(): from langchain.llms import OpenAI from langchain import PromptTemplate, LLMChain global output global response output = output question = ( "Use the following context given below to generate a detailed SOAP Report:\n\n" ) question += output print(question) template = """Question: {question} Answer: Let's think step by step.""" prompt = PromptTemplate(template=template, input_variables=["question"]) llm = OpenAI() llm_chain = LLMChain(prompt=prompt, llm=llm) response = llm_chain.run(question) return response global path def docx(name): global response response = response import docx global path path = f"/home/user/app/docs/{name}.docx" doc = docx.Document() doc.add_paragraph(response) doc.save(path) return "Successfully saved .docx File" import gradio as gr css = """ .col{ max-width: 50%; margin: 0 auto; display: flex; flex-direction: column; justify-content: center; align-items: center; } """ with gr.Blocks(css=css) as demo: gr.Markdown("File Chatting App") with gr.Tab("Chat with Files"): with gr.Column(elem_classes="col"): with gr.Tab("Upload and Process Files"): with gr.Column(): api_key_input = gr.Textbox(label="Enter API Key here") api_key_button = gr.Button("Submit") api_key_output = gr.Textbox(label="Output") file_input = gr.Files(label="Upload File(s) here") upload_button = gr.Button("Upload") file_output = gr.Textbox(label="Output") process_button = gr.Button("Process") process_output = gr.Textbox(label="Output") with gr.Tab("Ask Questions to Files"): with gr.Column(): search_input = gr.Textbox(label="Enter Question here") search_button = gr.Button("Search") search_output = gr.Textbox(label="Output") search_gpt_button = gr.Button("Ask ChatGPT") search_gpt_output = gr.Textbox(label="Output") delete_button = gr.Button("Delete") delete_output = gr.Textbox(label="Output") with gr.Tab("Chat with Local Files"): with gr.Column(elem_classes="col"): local_search_input = gr.Textbox(label="Enter Question here") local_search_button = gr.Button("Search") local_search_output = gr.Textbox(label="Output") local_gpt_button = gr.Button("Ask ChatGPT") local_gpt_output = gr.Textbox(label="Output") with gr.Tab("Ask Question to SOAP Report"): with gr.Column(elem_classes="col"): refresh_button = gr.Button("Refresh") soap_input = gr.Dropdown(label="Choose File") soap_question = gr.Textbox(label="Enter Question here") soap_button = gr.Button("Submit") soap_output = gr.Textbox(label="Output") with gr.Tab("Convert Audio to SOAP Report"): with gr.Column(elem_classes="col"): mic_text_input = gr.Audio( source="microphone", type="filepath", label="Speak to the Microphone" ) mic_text_button = gr.Button("Generate Transcript") mic_text_output = gr.Textbox(label="Output") upload_text_input = gr.Audio( source="upload", type="filepath", label="Upload Audio File here" ) upload_text_button = gr.Button("Generate Transcript") upload_text_output = gr.Textbox(label="Output") transcript_input = gr.Textbox(label="Enter Transcript here") transcript_button = gr.Button("Generate SOAP Report") transcript_output = gr.Textbox(label="Output") text_soap_button = gr.Button("Generate SOAP Report") text_soap_output = gr.Textbox(label="Output") docx_input = gr.Textbox(label="Enter the name of .docx File") docx_button = gr.Button("Save .docx File") docx_output = gr.Textbox(label="Output") api_key_button.click(api_key, inputs=api_key_input, outputs=api_key_output) upload_button.click(save_file, inputs=file_input, outputs=file_output) process_button.click(process_file, inputs=None, outputs=process_output) search_button.click(search_file, inputs=search_input, outputs=search_output) search_gpt_button.click(search_gpt, inputs=search_input, outputs=search_gpt_output) delete_button.click(delete_file, inputs=None, outputs=delete_output) local_search_button.click( search_local, inputs=local_search_input, outputs=local_search_output ) local_gpt_button.click( local_gpt, inputs=local_search_input, outputs=local_gpt_output ) refresh_button.click(list_files, inputs=None, outputs=soap_input) soap_button.click( soap_report, inputs=[soap_input, soap_question], outputs=soap_output ) mic_text_button.click(audio_text, inputs=mic_text_input, outputs=mic_text_output) upload_text_button.click( audio_text, inputs=upload_text_input, outputs=upload_text_output ) transcript_button.click( transcript, inputs=transcript_input, outputs=transcript_output ) text_soap_button.click(text_soap, inputs=None, outputs=text_soap_output) docx_button.click(docx, inputs=docx_input, outputs=docx_output) demo.queue() demo.launch()