naveenvenkatesh commited on
Commit
a52afff
·
verified ·
1 Parent(s): ff1bb0d

Update incompletesentencefinder.py

Browse files
Files changed (1) hide show
  1. incompletesentencefinder.py +25 -21
incompletesentencefinder.py CHANGED
@@ -1,4 +1,4 @@
1
- import fitz # PyMuPDF
2
  import openai
3
  import gradio as gr
4
 
@@ -17,8 +17,7 @@ class IncompleteSentenceFinder:
17
  Args:
18
  api_key (str): Your OpenAI API key.
19
  """
20
- # openai.api_key = openai_api_key
21
- pass
22
 
23
  def _check_incomplete_sentence(self, text: str) -> str:
24
 
@@ -31,19 +30,20 @@ class IncompleteSentenceFinder:
31
  Returns:
32
  str: Incomplete sentences identified by GPT-3.
33
  """
34
- # Create a request to OpenAI's GPT-3 engine to identify incomplete sentences.
35
- response = openai.Completion.create(
36
- engine="text-davinci-003",
37
- prompt=f"list out the incomplete sentences in the following text:\n{text}",
38
- max_tokens=1000,
 
 
 
 
 
 
39
  )
40
-
41
- # Extract and strip the text of identified incomplete sentences from the GPT-3 response.
42
- incomplete_sentences = response.choices[0].text.strip()
43
-
44
- print("incomplete_sentences Extracted Successfully!")
45
-
46
- return incomplete_sentences
47
 
48
  def get_incomplete_sentence(self,pdf_file) -> str:
49
 
@@ -54,13 +54,17 @@ class IncompleteSentenceFinder:
54
  str: Incomplete sentences identified by GPT-3.
55
  """
56
  try:
57
- # Open the PDF file using PyMuPDF's fitz library
58
- doc = fitz.open(pdf_file.name)
 
59
  incomplete_text = ""
60
-
61
- # Iterate through each page in the PDF document and extract the text
62
- for page in doc:
63
- text = page.get_text()
 
 
 
64
  incomplete_text += self._check_incomplete_sentence(text)
65
 
66
  return incomplete_text
 
1
+ from PyPDF2 import PdfReader
2
  import openai
3
  import gradio as gr
4
 
 
17
  Args:
18
  api_key (str): Your OpenAI API key.
19
  """
20
+ self.client = OpenAI()
 
21
 
22
  def _check_incomplete_sentence(self, text: str) -> str:
23
 
 
30
  Returns:
31
  str: Incomplete sentences identified by GPT-3.
32
  """
33
+ conversation = [
34
+ {"role": "system", "content": "You are a helpful incomplete sentences finder"},
35
+ {"role": "user", "content": f"""list out the incomplete sentences in the following text: {text}"""}
36
+ ]
37
+
38
+ # Call OpenAI GPT-3.5-turbo
39
+ chat_completion = self.client.chat.completions.create(
40
+ model = "gpt-3.5-turbo",
41
+ messages = conversation,
42
+ max_tokens=500,
43
+ temperature=0
44
  )
45
+ response = chat_completion.choices[0].message.content
46
+ return response
 
 
 
 
 
47
 
48
  def get_incomplete_sentence(self,pdf_file) -> str:
49
 
 
54
  str: Incomplete sentences identified by GPT-3.
55
  """
56
  try:
57
+ # Open the multi-page PDF using PdfReaderer
58
+ pdf = PdfReader(pdf_file_path)
59
+
60
  incomplete_text = ""
61
+
62
+ # Extract text from each page and pass it to the process_text function
63
+ for page_number in range(len(pdf.pages)):
64
+
65
+ # Extract text from the page
66
+ page = pdf.pages[page_number]
67
+ text = page.extract_text()
68
  incomplete_text += self._check_incomplete_sentence(text)
69
 
70
  return incomplete_text