Spaces:
Sleeping
Sleeping
File size: 4,623 Bytes
588b16e 245e161 588b16e 2fbb473 588b16e 059c9ac 588b16e f156b0d 588b16e b895d40 f8d40ff aade48a f8d40ff b895d40 8016946 b895d40 f8d40ff 588b16e 5ac67c9 9dbd6b3 5ac67c9 588b16e 5ac67c9 588b16e 5ac67c9 588b16e 5ac67c9 588b16e 5ac67c9 588b16e 9237f2d d087090 588b16e d087090 9237f2d 588b16e 007ac39 588b16e d087090 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
from PyPDF2 import PdfReader
from openai import AzureOpenAI
import gradio as gr
import os
class AggressiveContentFinder:
"""
This class identifies and extracts aggressive terms in a contract document using OpenAI's GPT-3.
"""
def __init__(self):
"""
Initialize the AggressiveContentFinder with your OpenAI API key.
"""
# openai.api_type = os.getenv['api_type']
# openai.api_base = os.getenv['api_base']
# openai.api_version = os.getenv['api_version']
# openai.api_key = os.getenv['api_key']
pass
def _extract_aggressive_content(self, contract_text: str) -> str:
"""
Use OpenAI's GPT-3 to identify aggressive terms in the given contract text.
Args:
contract_text (str): Text extracted from the contract.
Returns:
str: Identified aggressive terms.
"""
try:
client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"),
api_version="2023-07-01-preview",
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)
conversation = [
{"role": "system", "content": "You are a helpful Aggressive Terms Finder in Given Contract."},
{"role": "user", "content": f"""This is a contract document content. Your task is to find aggressive terms, warning terms and penalties in the given contract.
```contract: {contract_text}```"""}
]
# Call OpenAI GPT-3.5-turbo
chat_completion = client.chat.completions.create(
model = "GPT-3",
messages = conversation,
max_tokens=1000,
temperature=0
)
response = chat_completion.choices[0].message.content
return response
except Exception as e:
print(f"An error occurred during text analysis: {str(e)}")
def get_aggressive_content(self, pdf_file_path: str):
"""
Extract text from a PDF document and identify aggressive terms.
Args:
pdf_file_path (str): Path to the PDF document.
Returns:
str: Identified aggressive terms in the contract document.
This method opens a multi-page PDF using PdfReader and iterates through each page. For each page, it extracts
the text and passes it to the _extract_aggressive_content method for further processing. The identified
aggressive terms are concatenated and returned. If any errors occur during PDF processing, they are logged for
debugging.
"""
# try:
# Open the multi-page PDF using PdfReader
pdf = PdfReader(pdf_file_path.name)
aggressive_terms = ""
# Extract text from each page and pass it to the process_text function
for page_number in range(len(pdf.pages)):
# Extract text from the page
page = pdf.pages[page_number]
text = page.extract_text()
# Pass the text to the process_text function for further processing
aggressive_terms += self._extract_aggressive_content(text)
return aggressive_terms
# except Exception as e:
# print(f"An error occurred while processing the PDF document: {str(e)}")
def file_output_fnn(self,file_path):
file_path = file_path.name
return file_path
def gradio_interface(self):
with gr.Blocks(css="style.css",theme='xiaobaiyuan/theme_brief') as demo:
with gr.Row(elem_id = "col-container",scale=0.80):
# with gr.Column(elem_id = "col-container",scale=0.80):
# file1 = gr.File(label="File",elem_classes="filenameshow")
# with gr.Column(elem_id = "col-container",scale=0.20):
# upload_button1 = gr.UploadButton(
# "Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"],
# elem_classes="uploadbutton")
aggressive_content = gr.Button("Get Aggressive Content",elem_classes="uploadbutton")
with gr.Row(elem_id = "col-container",scale=0.60):
headings = gr.Textbox(label = "Aggressive Content")
# upload_button1.upload(self.file_output_fnn,upload_button1,file1)
aggressive_content.click(self.get_aggressive_content,[],headings)
|