scholarly360's picture
Create app.py
6c94ef1 verified
import streamlit as st
st.set_page_config(layout="wide")
import uuid
from pathlib import Path
import os
import pandas as pd
import json
import docx
# import openai
# from langchain.chat_models import ChatOpenAI
# from langchain.prompts import PromptTemplate
st.title("Passage Name with Gen-AI")
os.environ["OPENAI_API_KEY"] = os.environ["OPEN_API_KEY"]
from openai import OpenAI
client = OpenAI()
list_dict = []
LIMIT_WORDS_PARA = 7
# Function to guess a single topic for a given text
def guess_topic(clause_text):
response = client.chat.completions.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": "You are a legal contract assistant. Find type or title of the paragraph, e.g. Governing Law, Confidentiality etc."
},
{
"role": "user",
"content": "THIS AGREEMENT SHALL BE GOVERNED BY AND CONSTRUED IN ACCORDANCE WITH THE LAWS OF THE STATE OF NEW YORK."
},
{
"role": "assistant",
"content": "Governing Law"
},
{
"role": "user",
"content": "The provisions of this Agreement will be held in strictest confidence by you and the Company and will not be publicized or disclosed in any manner whatsoever; provided, however, that: (a) you may disclose this Agreement to your immediate family; (b) the parties may disclose this Agreement in confidence to their respective attorneys, accountants, auditors, tax preparers, and financial advisors; (c) the Company may disclose this Agreement as necessary to fulfill standard or legally required corporate reporting or disclosure requirements; and (d) the parties may disclose this Agreement insofar as such disclosure may be necessary to enforce its terms or as otherwise required by law."
},
{
"role": "assistant",
"content": "Confidentiality"
},
{
"role": "user",
"content": clause_text
},
],
temperature=0,
max_tokens=10,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return(response)
with st.form("my_form"):
multi = '''
1. Upload many files (DOCX) Only
2. Press Calculate and Get Name of Clauses
'''
st.markdown(multi)
uploaded_files = st.file_uploader('Upload your files',accept_multiple_files=True)
for f in uploaded_files:
if True:
save_path = Path(os.getcwd(), f.name)
with open(save_path, mode='wb') as w:
w.write(f.getvalue())
doc = docx.Document(save_path)
paragraphs = doc.paragraphs
for para in paragraphs:
text = para.text
words = text.split()
if len(words) > LIMIT_WORDS_PARA:
list_dict.append({"file":f.name, "example":text})
######
print('len(list_dict)',len(list_dict))
df_new_trimmed = pd.DataFrame(list_dict)
######
submitted = st.form_submit_button("Calculate")
if submitted and (uploaded_files is not None):
topics_list = []
for doc in list(df_new_trimmed['example']):
tmp_keywords_custom = guess_topic(doc).choices[0].message.content
topics_list.append(tmp_keywords_custom)
df_new_trimmed['Predicted_Clause'] = topics_list
print(topics_list[0])
st.dataframe(df_new_trimmed)
st.write('-----')