Spaces:
Runtime error
Runtime error
viboognesh
commited on
Upload folder using huggingface_hub
Browse files- app.py +105 -0
- prompts.py +63 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
from anthropic import Anthropic
|
4 |
+
from prompts import DIFFERENTIATE_PROMPT
|
5 |
+
|
6 |
+
def extract_differences(input_text):
|
7 |
+
input_text = input_text.strip()
|
8 |
+
qa_pairs = input_text.split('</difference>')
|
9 |
+
|
10 |
+
# Initialize an empty list to hold the parsed dictionary objects
|
11 |
+
parsed_data = []
|
12 |
+
|
13 |
+
# Iterate over each QA pair
|
14 |
+
for pair in qa_pairs:
|
15 |
+
# Check if the pair has both question and answer (ignoring the last one)
|
16 |
+
if '<text1_section>' in pair and '</text1_section>' in pair and '<text2_section>' in pair and '</text2_section>' in pair and '<explanation>' in pair and '</explanation>' in pair:
|
17 |
+
# Extract the question and answer text
|
18 |
+
text1 = pair.split('<text1_section>')[1].split('</text1_section>')[0]
|
19 |
+
text2 = pair.split('<text2_section>')[1].split('</text2_section>')[0]
|
20 |
+
explanation = pair.split('<explanation>')[1].split('</explanation>')[0]
|
21 |
+
|
22 |
+
# Create a dictionary for the current pair and append it to the list
|
23 |
+
parsed_data.append({'text1': text1.strip(), 'text2': text2.strip(), 'explanation': explanation.strip()})
|
24 |
+
|
25 |
+
return parsed_data
|
26 |
+
|
27 |
+
st.cache_data()
|
28 |
+
def make_llm_api_call(prompt):
|
29 |
+
client = Anthropic()
|
30 |
+
message = client.messages.create(
|
31 |
+
model="claude-3-haiku-20240307",
|
32 |
+
max_tokens=4096,
|
33 |
+
temperature=0,
|
34 |
+
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
35 |
+
)
|
36 |
+
return message
|
37 |
+
|
38 |
+
def get_llm_response(extractedtext1, extractedtext2):
|
39 |
+
prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2)
|
40 |
+
|
41 |
+
message = make_llm_api_call(prompt)
|
42 |
+
|
43 |
+
message_text = message.content[0].text
|
44 |
+
before_differences = message_text.split("<differences>")[0]
|
45 |
+
after_differences = message_text.split("</differences>")[1]
|
46 |
+
try:
|
47 |
+
differences_list = extract_differences(message_text.split("<differences>")[1].split("</differences>")[0].strip())
|
48 |
+
except Exception as e:
|
49 |
+
return message_text
|
50 |
+
|
51 |
+
difference_content = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------" for d in differences_list])
|
52 |
+
display_text = f"{before_differences}\n\n{difference_content}\n\n{after_differences}"
|
53 |
+
return display_text
|
54 |
+
|
55 |
+
|
56 |
+
def extract_text_with_pypdf(pdf_path):
|
57 |
+
reader = PdfReader(pdf_path)
|
58 |
+
text = ""
|
59 |
+
for page in reader.pages:
|
60 |
+
text += page.extract_text() + "\n"
|
61 |
+
return text
|
62 |
+
|
63 |
+
|
64 |
+
def main():
|
65 |
+
st.set_page_config(layout="wide") # Enable wide layout
|
66 |
+
st.markdown('<div style="text-align: center;">' + '<h1>PDF Upload and Compare App</h1>' + '</div>', unsafe_allow_html=True)
|
67 |
+
|
68 |
+
# Create columns for side-by-side buttons
|
69 |
+
col1, col2 = st.columns([2, 2])
|
70 |
+
|
71 |
+
# Add upload button to left column
|
72 |
+
uploaded_file1 = col1.file_uploader("**Text 1**", type="pdf")
|
73 |
+
|
74 |
+
# Add upload button to right column
|
75 |
+
uploaded_file2 = col2.file_uploader("**Text 2**", type="pdf")
|
76 |
+
|
77 |
+
# Check if both files are uploaded
|
78 |
+
if uploaded_file1 and uploaded_file2:
|
79 |
+
# Get filenames from uploaded files
|
80 |
+
filename1 = uploaded_file1.name
|
81 |
+
filename2 = uploaded_file2.name
|
82 |
+
|
83 |
+
try:
|
84 |
+
extracted_text1 = extract_text_with_pypdf(uploaded_file1)
|
85 |
+
extracted_text2 = extract_text_with_pypdf(uploaded_file2)
|
86 |
+
with col1.expander(filename1):
|
87 |
+
st.write(extracted_text1)
|
88 |
+
|
89 |
+
with col2.expander(filename2):
|
90 |
+
st.write(extracted_text2)
|
91 |
+
|
92 |
+
st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
|
93 |
+
except Exception as e:
|
94 |
+
st.error(f"Error saving files: {str(e)}")
|
95 |
+
|
96 |
+
# Add button at the bottom to run Find Differences function
|
97 |
+
if st.button("Find Differences"):
|
98 |
+
try:
|
99 |
+
display_text = get_llm_response(extracted_text1, extracted_text2)
|
100 |
+
st.markdown(display_text)
|
101 |
+
except Exception as e:
|
102 |
+
st.error(f"Error finding differences: {str(e)}")
|
103 |
+
|
104 |
+
if __name__ == "__main__":
|
105 |
+
main()
|
prompts.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DIFFERENTIATE_PROMPT = """You will be given two texts to compare. Your task is to identify and highlight the sections of text that differ in meaning between the two texts. Focus only on differences that change the semantic content, not minor stylistic variations.
|
2 |
+
|
3 |
+
Here are the two texts to compare:
|
4 |
+
|
5 |
+
<text1>
|
6 |
+
{text1}
|
7 |
+
</text1>
|
8 |
+
|
9 |
+
<text2>
|
10 |
+
{text2}
|
11 |
+
</text2>
|
12 |
+
|
13 |
+
Instructions:
|
14 |
+
1. Carefully read and analyze both texts.
|
15 |
+
2. Identify sections where the meaning differs between the two texts.
|
16 |
+
3. Highlight these differences by copying the relevant sections from both texts.
|
17 |
+
4. Provide a brief explanation of how the meaning differs for each highlighted section.
|
18 |
+
5. Ignore minor differences in wording that do not change the overall meaning.
|
19 |
+
|
20 |
+
Present your analysis in the following format:
|
21 |
+
|
22 |
+
<differences>
|
23 |
+
<difference>
|
24 |
+
<text1_section>
|
25 |
+
[Copy the relevant section from Text 1 here]
|
26 |
+
</text1_section>
|
27 |
+
<text2_section>
|
28 |
+
[Copy the relevant section from Text 2 here]
|
29 |
+
</text2_section>
|
30 |
+
<explanation>
|
31 |
+
[Briefly explain how the meaning differs between these sections]
|
32 |
+
</explanation>
|
33 |
+
</difference>
|
34 |
+
<difference>
|
35 |
+
<text1_section>
|
36 |
+
[Copy the relevant section from Text 1 here]
|
37 |
+
</text1_section>
|
38 |
+
<text2_section>
|
39 |
+
[Copy the relevant section from Text 2 here]
|
40 |
+
</text2_section>
|
41 |
+
<explanation>
|
42 |
+
[Briefly explain how the meaning differs between these sections]
|
43 |
+
</explanation>
|
44 |
+
</difference>
|
45 |
+
...
|
46 |
+
<difference>
|
47 |
+
<text1_section>
|
48 |
+
[Copy the relevant section from Text 1 here]
|
49 |
+
</text1_section>
|
50 |
+
<text2_section>
|
51 |
+
[Copy the relevant section from Text 2 here]
|
52 |
+
</text2_section>
|
53 |
+
<explanation>
|
54 |
+
[Briefly explain how the meaning differs between these sections]
|
55 |
+
</explanation>
|
56 |
+
</difference>
|
57 |
+
</differences>
|
58 |
+
|
59 |
+
If there are no meaningful differences between the texts, return an empty <differences> element like this
|
60 |
+
|
61 |
+
<differences></differences>
|
62 |
+
|
63 |
+
Remember to focus only on differences that significantly change the meaning or content of the text. Do not highlight minor stylistic variations or differences in phrasing that convey the same information."""
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
PyPDF2
|