Spaces:
Sleeping
Sleeping
File size: 3,628 Bytes
fe55df8 1228921 4ec3e55 56bd766 4ec3e55 a3582f4 4ec3e55 1f25217 4ec3e55 21c5ad6 66f3b9d 4ec3e55 f906a84 cc1b1ef 7f2d761 f906a84 eb6d128 7f2d761 cc1b1ef 7f2d761 4ec3e55 d6069ee 4ec3e55 7d6ad3d 1228921 7771ad4 1228921 7d6ad3d 1228921 4ec3e55 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from openai import AzureOpenAI
from PyPDF2 import PdfReader
import logging
import os
# Configure logging
logging.basicConfig(
filename='contract_missing_clausses.log', # You can adjust the log file name here
filemode='a',
format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
datefmt='%Y-%b-%d %H:%M:%S'
)
LOGGER = logging.getLogger(__name__)
log_level_env = 'INFO' # You can adjust the log level here
log_level_dict = {
'DEBUG': logging.DEBUG,
'INFO': logging.INFO,
'WARNING': logging.WARNING,
'ERROR': logging.ERROR,
'CRITICAL': logging.CRITICAL
}
if log_level_env in log_level_dict:
log_level = log_level_dict[log_level_env]
else:
log_level = log_level_dict['INFO']
LOGGER.setLevel(log_level)
class ContractMissingClauses:
"""
Class for identifying missing clauses, sub-clauses, and terms in a contract.
"""
def __init__(self):
"""
Initialize the ContractMissingClauses class and set up the OpenAI API client.
"""
# openai.api_type = os.getenv['api_type']
# openai.api_base = os.getenv['api_base']
# openai.api_version = os.getenv['api_version']
# openai.api_key = os.getenv['api_key']
pass
def get_missing_clauses(self, contract: str):
"""
Generate and print missing clauses, sub-clauses, and terms in the given contract.
Args:
contract (str): The text of the contract.
"""
try:
LOGGER.info("Analyzing contract and extracting missing clauses...")
# Generate text using the OpenAI GPT-3 model
client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"),
api_version="2023-07-01-preview",
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)
conversation = [
{"role": "system", "content": "You are a helpful incomplete sentences finder"},
{"role": "user", "content": f"""list out the incomplete sentences in the following text: {contract}"""}
]
# Call OpenAI GPT-3.5-turbo
chat_completion = client.chat.completions.create(
model = "ChatGPT",
messages = conversation,
max_tokens=1000,
temperature=0
)
response = chat_completion.choices[0].message.content
return response
except Exception as e:
# If an error occurs during the key-value extraction process, log the error
LOGGER.error(f"Error occurred while extracting missing clauses: {str(e)}")
def iterate_each_page(self,pdf_file_path):
"""
Iterate through each page of a PDF contract, extract text, and call get_missing_clauses for each page.
"""
try:
LOGGER.info("Analyzing contract and extracting pdf page...")
result = ''
pdf = PdfReader(pdf_file_path.name)
# Extract text from each page and pass it to the process_text function
for page_number in range(len(pdf.pages)):
# Extract text from the page
page = pdf.pages[page_number]
text = page.extract_text()
result += self.get_missing_clauses(text)
return result
except Exception as e:
# If an error occurs during the key-value extraction process, log the error
LOGGER.error(f"Error occurred while extracting pdf page: {str(e)}")
|