viboognesh commited on
Commit
180a0cc
·
verified ·
1 Parent(s): 83d4ab7

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +105 -0
  2. prompts.py +63 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from anthropic import Anthropic
4
+ from prompts import DIFFERENTIATE_PROMPT
5
+
6
+ def extract_differences(input_text):
7
+ input_text = input_text.strip()
8
+ qa_pairs = input_text.split('</difference>')
9
+
10
+ # Initialize an empty list to hold the parsed dictionary objects
11
+ parsed_data = []
12
+
13
+ # Iterate over each QA pair
14
+ for pair in qa_pairs:
15
+ # Check if the pair has both question and answer (ignoring the last one)
16
+ if '<text1_section>' in pair and '</text1_section>' in pair and '<text2_section>' in pair and '</text2_section>' in pair and '<explanation>' in pair and '</explanation>' in pair:
17
+ # Extract the question and answer text
18
+ text1 = pair.split('<text1_section>')[1].split('</text1_section>')[0]
19
+ text2 = pair.split('<text2_section>')[1].split('</text2_section>')[0]
20
+ explanation = pair.split('<explanation>')[1].split('</explanation>')[0]
21
+
22
+ # Create a dictionary for the current pair and append it to the list
23
+ parsed_data.append({'text1': text1.strip(), 'text2': text2.strip(), 'explanation': explanation.strip()})
24
+
25
+ return parsed_data
26
+
27
+ st.cache_data()
28
+ def make_llm_api_call(prompt):
29
+ client = Anthropic()
30
+ message = client.messages.create(
31
+ model="claude-3-haiku-20240307",
32
+ max_tokens=4096,
33
+ temperature=0,
34
+ messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
35
+ )
36
+ return message
37
+
38
+ def get_llm_response(extractedtext1, extractedtext2):
39
+ prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2)
40
+
41
+ message = make_llm_api_call(prompt)
42
+
43
+ message_text = message.content[0].text
44
+ before_differences = message_text.split("<differences>")[0]
45
+ after_differences = message_text.split("</differences>")[1]
46
+ try:
47
+ differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
48
+ except Exception as e:
49
+ return message_text
50
+
51
+ difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------" for d in differences_list])
52
+ display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
53
+ return display_text
54
+
55
+
56
+ def extract_text_with_pypdf(pdf_path):
57
+ reader = PdfReader(pdf_path)
58
+ text = ""
59
+ for page in reader.pages:
60
+ text += page.extract_text() + "\n"
61
+ return text
62
+
63
+
64
+ def main():
65
+ st.set_page_config(layout="wide") # Enable wide layout
66
+ st.markdown('<div style="text-align: center;">' + '<h1>PDF Upload and Compare App</h1>' + '</div>', unsafe_allow_html=True)
67
+
68
+ # Create columns for side-by-side buttons
69
+ col1, col2 = st.columns([2, 2])
70
+
71
+ # Add upload button to left column
72
+ uploaded_file1 = col1.file_uploader("**Text 1**", type="pdf")
73
+
74
+ # Add upload button to right column
75
+ uploaded_file2 = col2.file_uploader("**Text 2**", type="pdf")
76
+
77
+ # Check if both files are uploaded
78
+ if uploaded_file1 and uploaded_file2:
79
+ # Get filenames from uploaded files
80
+ filename1 = uploaded_file1.name
81
+ filename2 = uploaded_file2.name
82
+
83
+ try:
84
+ extracted_text1 = extract_text_with_pypdf(uploaded_file1)
85
+ extracted_text2 = extract_text_with_pypdf(uploaded_file2)
86
+ with col1.expander(filename1):
87
+ st.write(extracted_text1)
88
+
89
+ with col2.expander(filename2):
90
+ st.write(extracted_text2)
91
+
92
+ st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
93
+ except Exception as e:
94
+ st.error(f"Error saving files: {str(e)}")
95
+
96
+ # Add button at the bottom to run Find Differences function
97
+ if st.button("Find Differences"):
98
+ try:
99
+ display_text = get_llm_response(extracted_text1, extracted_text2)
100
+ st.markdown(display_text)
101
+ except Exception as e:
102
+ st.error(f"Error finding differences: {str(e)}")
103
+
104
+ if __name__ == "__main__":
105
+ main()
prompts.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DIFFERENTIATE_PROMPT = """You will be given two texts to compare. Your task is to identify and highlight the sections of text that differ in meaning between the two texts. Focus only on differences that change the semantic content, not minor stylistic variations.
2
+
3
+ Here are the two texts to compare:
4
+
5
+ <text1>
6
+ {text1}
7
+ </text1>
8
+
9
+ <text2>
10
+ {text2}
11
+ </text2>
12
+
13
+ Instructions:
14
+ 1. Carefully read and analyze both texts.
15
+ 2. Identify sections where the meaning differs between the two texts.
16
+ 3. Highlight these differences by copying the relevant sections from both texts.
17
+ 4. Provide a brief explanation of how the meaning differs for each highlighted section.
18
+ 5. Ignore minor differences in wording that do not change the overall meaning.
19
+
20
+ Present your analysis in the following format:
21
+
22
+ <differences>
23
+ <difference>
24
+ <text1_section>
25
+ [Copy the relevant section from Text 1 here]
26
+ </text1_section>
27
+ <text2_section>
28
+ [Copy the relevant section from Text 2 here]
29
+ </text2_section>
30
+ <explanation>
31
+ [Briefly explain how the meaning differs between these sections]
32
+ </explanation>
33
+ </difference>
34
+ <difference>
35
+ <text1_section>
36
+ [Copy the relevant section from Text 1 here]
37
+ </text1_section>
38
+ <text2_section>
39
+ [Copy the relevant section from Text 2 here]
40
+ </text2_section>
41
+ <explanation>
42
+ [Briefly explain how the meaning differs between these sections]
43
+ </explanation>
44
+ </difference>
45
+ ...
46
+ <difference>
47
+ <text1_section>
48
+ [Copy the relevant section from Text 1 here]
49
+ </text1_section>
50
+ <text2_section>
51
+ [Copy the relevant section from Text 2 here]
52
+ </text2_section>
53
+ <explanation>
54
+ [Briefly explain how the meaning differs between these sections]
55
+ </explanation>
56
+ </difference>
57
+ </differences>
58
+
59
+ If there are no meaningful differences between the texts, return an empty <differences> element like this
60
+
61
+ <differences></differences>
62
+
63
+ Remember to focus only on differences that significantly change the meaning or content of the text. Do not highlight minor stylistic variations or differences in phrasing that convey the same information."""
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ PyPDF2