scholarly360 commited on
Commit
370ba10
·
verified ·
1 Parent(s): 670a315

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ st.set_page_config(layout="wide")
3
+ from annotated_text import annotated_text, annotation
4
+ import fitz
5
+ import os
6
+ import chromadb
7
+ import uuid
8
+ from pathlib import Path
9
+
10
+ os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
11
+ st.title("Contracts Summary ")
12
+ import pandas as pd
13
+
14
+ from langchain.retrievers import BM25Retriever, EnsembleRetriever
15
+ from langchain.schema import Document
16
+ from langchain.vectorstores import Chroma
17
+ from langchain.embeddings import HuggingFaceEmbeddings
18
+ import spacy
19
+ # Load the English model from SpaCy
20
+ nlp = spacy.load("en_core_web_md")
21
+
22
+ def util_upload_file_and_return_list_docs(uploaded_files):
23
+ #util_del_cwd()
24
+ list_docs = []
25
+ list_save_path = []
26
+ for uploaded_file in uploaded_files:
27
+ save_path = Path(os.getcwd(), uploaded_file.name)
28
+ with open(save_path, mode='wb') as w:
29
+ w.write(uploaded_file.getvalue())
30
+ #print('save_path:', save_path)
31
+ docs = fitz.open(save_path)
32
+ list_docs.append(docs)
33
+ list_save_path.append(save_path)
34
+ return(list_docs, list_save_path)
35
+
36
+
37
+ def util_get_list_page_and_passage(list_docs, list_save_path):
38
+ #page_documents = []
39
+ documents = []
40
+ for ind_doc, docs in enumerate(list_docs):
41
+ text = ''
42
+ for txt_index, txt_page in enumerate(docs):
43
+ text = text + txt_page.get_text()
44
+ documents.append(text)
45
+ return(documents)
46
+
47
+
48
+
49
+
50
+ documents = []
51
+
52
+
53
+ def get_summary_single_doc(text):
54
+ from langchain.llms import OpenAI
55
+ from langchain.chains.summarize import load_summarize_chain
56
+ from langchain.text_splitter import CharacterTextSplitter
57
+ from langchain.prompts import PromptTemplate
58
+ from langchain.llms import OpenAI
59
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
60
+ LLM_KEY=os.environ.get("OPEN_API_KEY")
61
+ text_splitter = CharacterTextSplitter(
62
+ separator="\n",
63
+ chunk_size=3000,
64
+ chunk_overlap=20
65
+ )
66
+ #create the documents from list of texts
67
+ texts = text_splitter.create_documents([text])
68
+ prompt_template = """Write a concise summary of the following:
69
+ {text}
70
+ CONCISE SUMMARY:"""
71
+ prompt = PromptTemplate.from_template(prompt_template)
72
+
73
+ refine_template = (
74
+ "Your job is to produce a final summary with key learnings\n"
75
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
76
+ "We have the opportunity to refine the existing summary"
77
+ "(only if needed) with detailed context below.\n"
78
+ "------------\n"
79
+ "{text}\n"
80
+ "------------\n"
81
+ "Given the new context, refine the original summary"
82
+ "If the context isn't useful, return the original summary."
83
+ )
84
+ refine_prompt = PromptTemplate.from_template(refine_template)
85
+
86
+ #Define the LLM
87
+ # here we are using OpenAI's ChatGPT
88
+ from langchain.chat_models import ChatOpenAI
89
+ model_name = "gpt-3.5-turbo"
90
+ llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)
91
+
92
+ refine_chain = load_summarize_chain(
93
+ llm,
94
+ chain_type="refine",
95
+ question_prompt=prompt,
96
+ refine_prompt=refine_prompt,
97
+ return_intermediate_steps=True,
98
+
99
+ )
100
+ refine_outputs = refine_chain({'input_documents': texts})
101
+ return(refine_outputs['output_text'])
102
+
103
+
104
+ with st.form("my_form"):
105
+ multi = '''1. Download and Upload contract (PDF) .
106
+
107
+ e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf
108
+
109
+ e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
110
+ '''
111
+ st.markdown(multi)
112
+ multi = '''2. Press Summary .'''
113
+ st.markdown(multi)
114
+ multi = '''
115
+ ** Attempt is made for summary ** \n
116
+ '''
117
+ st.markdown(multi)
118
+ #uploaded_file = st.file_uploader("Choose a file")
119
+
120
+ list_docs = []
121
+ list_save_path = []
122
+ uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
123
+ submitted = st.form_submit_button("Summary")
124
+
125
+ if submitted and (uploaded_files is not None):
126
+ list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
127
+ documents = util_get_list_page_and_passage(list_docs, list_save_path)
128
+ for index, item in enumerate(documents):
129
+ st.write('Summary' + str(index+1) + ' :: ')
130
+ st.write(get_summary_single_doc(item))