File size: 3,628 Bytes
fe55df8
1228921
4ec3e55
56bd766
4ec3e55
a3582f4
4ec3e55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f25217
4ec3e55
 
 
 
 
21c5ad6
 
 
 
 
66f3b9d
4ec3e55
 
 
 
 
 
 
 
 
f906a84
 
cc1b1ef
 
 
 
 
7f2d761
f906a84
eb6d128
7f2d761
 
 
cc1b1ef
 
 
 
 
 
7f2d761
 
4ec3e55
 
 
 
 
 
d6069ee
4ec3e55
 
 
 
 
 
 
 
7d6ad3d
 
1228921
 
 
 
 
 
 
 
 
7771ad4
1228921
7d6ad3d
1228921
4ec3e55
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from openai import AzureOpenAI
from PyPDF2 import PdfReader
import logging
import os


# Configure logging
logging.basicConfig(
    filename='contract_missing_clausses.log',  # You can adjust the log file name here
    filemode='a',
    format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
    datefmt='%Y-%b-%d %H:%M:%S'
)
LOGGER = logging.getLogger(__name__)

log_level_env = 'INFO'  # You can adjust the log level here
log_level_dict = {
    'DEBUG': logging.DEBUG,
    'INFO': logging.INFO,
    'WARNING': logging.WARNING,
    'ERROR': logging.ERROR,
    'CRITICAL': logging.CRITICAL
}
if log_level_env in log_level_dict:
    log_level = log_level_dict[log_level_env]
else:
    log_level = log_level_dict['INFO']
LOGGER.setLevel(log_level)

class ContractMissingClauses:

  """
  Class for identifying missing clauses, sub-clauses, and terms in a contract.
  """

  def __init__(self):

      """
      Initialize the ContractMissingClauses class and set up the OpenAI API client.
      """

      # openai.api_type = os.getenv['api_type']
      # openai.api_base = os.getenv['api_base']
      # openai.api_version = os.getenv['api_version']
      # openai.api_key = os.getenv['api_key']
      pass
      
  def get_missing_clauses(self, contract: str):

      """
      Generate and print missing clauses, sub-clauses, and terms in the given contract.

      Args:
          contract (str): The text of the contract.
      """
      try:
            LOGGER.info("Analyzing contract and extracting missing clauses...")
            # Generate text using the OpenAI GPT-3 model
            client = AzureOpenAI(api_key=os.getenv("AZURE_OPENAI_KEY"),  
                                api_version="2023-07-01-preview",
                                azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
                                 )
            
            conversation = [
                            {"role": "system", "content": "You are a helpful incomplete sentences finder"},
                            {"role": "user", "content": f"""list out the incomplete sentences in the following text: {contract}"""}
                            ]
                        
            # Call OpenAI GPT-3.5-turbo
            chat_completion = client.chat.completions.create(
                model = "ChatGPT",
                messages = conversation,
                max_tokens=1000,
                temperature=0
            )
            response = chat_completion.choices[0].message.content
            return response

      except Exception as e:
            # If an error occurs during the key-value extraction process, log the error
            LOGGER.error(f"Error occurred while extracting missing clauses: {str(e)}")

      
  def iterate_each_page(self,pdf_file_path):

    """
    Iterate through each page of a PDF contract, extract text, and call get_missing_clauses for each page.
    """

    try:

      LOGGER.info("Analyzing contract and extracting pdf page...")
        
      result = ''
        
      pdf = PdfReader(pdf_file_path.name)
        
      # Extract text from each page and pass it to the process_text function
      for page_number in range(len(pdf.pages)):

        # Extract text from the page
        page = pdf.pages[page_number]
        text = page.extract_text()
        result += self.get_missing_clauses(text)
    
      return result
        
    except Exception as e:
            # If an error occurs during the key-value extraction process, log the error
            LOGGER.error(f"Error occurred while extracting pdf page: {str(e)}")