viboognesh commited on
Commit
be9c181
·
verified ·
1 Parent(s): f6df22c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -40
app.py CHANGED
@@ -35,7 +35,6 @@ def make_llm_api_call(prompt):
35
  )
36
  return message
37
 
38
-
39
  def get_llm_response(extractedtext1, extractedtext2):
40
  prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2)
41
 
@@ -69,6 +68,8 @@ def main():
69
  st.set_page_config(layout="wide") # Enable wide layout
70
  if "differences_data" not in st.session_state:
71
  st.session_state.differences_data = []
 
 
72
  if "file1" not in st.session_state:
73
  st.session_state.file1 = None
74
  if "file2" not in st.session_state:
@@ -91,6 +92,9 @@ def main():
91
  filename1 = st.session_state.file1.name
92
  filename2 = st.session_state.file2.name
93
 
 
 
 
94
  try:
95
  extracted_text1 = extract_text_with_pypdf(st.session_state.file1)
96
  extracted_text2 = extract_text_with_pypdf(st.session_state.file2)
@@ -103,48 +107,52 @@ def main():
103
  st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
104
  except Exception as e:
105
  st.error(f"Error saving files: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  # Add button at the bottom to run Find Differences function
108
  if st.button("Find Differences"):
109
- try:
110
- # display_text, parsed_data = process_concurrently(extracted_text1, extracted_text2)
111
- # display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
112
-
113
- i = 1
114
- for etext1, etext2 in zip(extracted_text1, extracted_text2):
115
- data = next((d for d in st.session_state.differences_data if d['etext1'] == etext1 and d['etext2'] == etext2), None)
116
- if data:
117
- pdata = data['pdata']
118
- dext1 = data['dext1']
119
- dext2 = data['dext2']
120
- else:
121
- pdata = get_llm_response(etext1, etext2)
122
- dext1 = etext1
123
- dext2 = etext2
124
-
125
- for diff in pdata:
126
- diff_text1 = diff['text1'].strip()
127
- diff_text2 = diff['text2'].strip()
128
- if diff_text1 == "" or diff_text2 == "": continue
129
- diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
130
- diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
131
- dext1 = diff_text1_phrase.join(dext1.split(diff_text1)) if diff_text1 in dext1 else dext1
132
- dext2 = diff_text2_phrase.join(dext2.split(diff_text2)) if diff_text2 in dext2 else dext2
133
-
134
- st.session_state.differences_data.append({"etext1": etext1, "etext2": etext2, "pdata": pdata, "dext1": dext1, "dext2": dext2})
135
- reverse_pdata = [{'text1': d['text2'], 'text2': d['text1'], 'explanation': d['explanation']} for d in pdata]
136
- st.session_state.differences_data.append({"etext1": etext2, "etext2": etext1, "pdata": reverse_pdata, "dext1": dext2, "dext2": dext1})
137
-
138
- display_text = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------\n" for d in pdata])
139
-
140
- with st.expander(f"**Page {i}** - {filename1}"):
141
- st.markdown("\n\n".join(dext1.splitlines()), unsafe_allow_html=True)
142
- with st.expander(f"**Page {i}** - {filename2}"):
143
- st.markdown("\n\n".join(dext2.splitlines()), unsafe_allow_html=True)
144
- st.markdown(display_text)
145
- i += 1
146
- except Exception as e:
147
- st.error(f"Error finding differences: {str(e)}")
148
 
149
  if __name__ == "__main__":
150
  main()
 
35
  )
36
  return message
37
 
 
38
  def get_llm_response(extractedtext1, extractedtext2):
39
  prompt = DIFFERENTIATE_PROMPT.format(text1=extractedtext1, text2=extractedtext2)
40
 
 
68
  st.set_page_config(layout="wide") # Enable wide layout
69
  if "differences_data" not in st.session_state:
70
  st.session_state.differences_data = []
71
+ if "display_data" not in st.session_state:
72
+ st.session_state.display_data = {"file1": None, "file2": None, "i": 0}
73
  if "file1" not in st.session_state:
74
  st.session_state.file1 = None
75
  if "file2" not in st.session_state:
 
92
  filename1 = st.session_state.file1.name
93
  filename2 = st.session_state.file2.name
94
 
95
+ if st.session_state.display_data["file1"] != st.session_state.file1 or st.session_state.display_data["file2"] != st.session_state.file2:
96
+ st.session_state.display_data = {"file1": st.session_state.file1, "file2": st.session_state.file2, "i": 0}
97
+
98
  try:
99
  extracted_text1 = extract_text_with_pypdf(st.session_state.file1)
100
  extracted_text2 = extract_text_with_pypdf(st.session_state.file2)
 
107
  st.success(f"Content of files **{filename1}** and **{filename2}** have been extracted successfully.")
108
  except Exception as e:
109
  st.error(f"Error saving files: {str(e)}")
110
+
111
+ try:
112
+ # display_text, parsed_data = process_concurrently(extracted_text1, extracted_text2)
113
+ # display_text, parsed_data = get_llm_response(extracted_text1, extracted_text2)
114
+
115
+ for i,(etext1, etext2) in enumerate(zip(extracted_text1, extracted_text2)):
116
+ if i >= st.session_state.display_data["i"]: break
117
+ data = next((d for d in st.session_state.differences_data if d['etext1'] == etext1 and d['etext2'] == etext2), None)
118
+ if data:
119
+ pdata = data['pdata']
120
+ dext1 = data['dext1']
121
+ dext2 = data['dext2']
122
+ else:
123
+ pdata = get_llm_response(etext1, etext2)
124
+ dext1 = etext1
125
+ dext2 = etext2
126
+
127
+ for diff in pdata:
128
+ diff_text1 = diff['text1'].strip()
129
+ diff_text2 = diff['text2'].strip()
130
+ if diff_text1 == "" or diff_text2 == "": continue
131
+ diff_text1_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text1.splitlines()])
132
+ diff_text2_phrase = "\n".join([f"<span style='background-color: grey;'>{t}</span>" for t in diff_text2.splitlines()])
133
+ dext1 = diff_text1_phrase.join(dext1.split(diff_text1)) if diff_text1 in dext1 else dext1
134
+ dext2 = diff_text2_phrase.join(dext2.split(diff_text2)) if diff_text2 in dext2 else dext2
135
+
136
+ st.session_state.differences_data.append({"etext1": etext1, "etext2": etext2, "pdata": pdata, "dext1": dext1, "dext2": dext2})
137
+ reverse_pdata = [{'text1': d['text2'], 'text2': d['text1'], 'explanation': d['explanation']} for d in pdata]
138
+ st.session_state.differences_data.append({"etext1": etext2, "etext2": etext1, "pdata": reverse_pdata, "dext1": dext2, "dext2": dext1})
139
+
140
+ display_text = "\n\n\n".join([f"**Text1:**\n\n{d['text1']}\n\n**Text2:**\n\n{d['text2']}\n\n**Explanation:**\n\n{d['explanation']}\n\n----------------------\n" for d in pdata])
141
+
142
+ with st.expander(f"**Page {i+1}** - {filename1}"):
143
+ st.markdown("\n\n".join(dext1.splitlines()), unsafe_allow_html=True)
144
+ with st.expander(f"**Page {i+1}** - {filename2}"):
145
+ st.markdown("\n\n".join(dext2.splitlines()), unsafe_allow_html=True)
146
+ st.markdown(display_text)
147
+ except Exception as e:
148
+ st.error(f"Error finding differences: {str(e)}")
149
+
150
 
151
  # Add button at the bottom to run Find Differences function
152
  if st.button("Find Differences"):
153
+ st.session_state.display_data["i"] = st.session_state.display_data["i"] + 5
154
+ st.rerun()
155
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  if __name__ == "__main__":
158
  main()