Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,10 +15,38 @@ openai.api_key = api_key
|
|
15 |
client = OpenAI(api_key=api_key)
|
16 |
pdf_folder = "pdf"
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
#---------------------PDF OVERVIEW----------------------
|
21 |
-
st.subheader("PDF Folder Overview")
|
22 |
def get_pdf_details(folder_path):
|
23 |
pdf_details = []
|
24 |
for filename in os.listdir(folder_path):
|
@@ -28,7 +56,7 @@ def get_pdf_details(folder_path):
|
|
28 |
with open(pdf_path, "rb") as file:
|
29 |
pdf_reader = PdfReader(file)
|
30 |
page_count = len(pdf_reader.pages)
|
31 |
-
pdf_details.append({"
|
32 |
except Exception as e:
|
33 |
st.warning(f"Could not read {filename}: {str(e)}")
|
34 |
return pdf_details
|
@@ -36,21 +64,23 @@ def get_pdf_details(folder_path):
|
|
36 |
pdf_list = get_pdf_details(pdf_folder)
|
37 |
pdf_df = pd.DataFrame(pdf_list)
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
|
45 |
#---------------------MULTISELECT AND TEXT EXTRACTION----------------------
|
46 |
-
st.subheader("Select PDFs for Extraction and Analysis")
|
47 |
|
48 |
pdf_files = [f for f in os.listdir(pdf_folder) if f.lower().endswith('.pdf')]
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
54 |
|
55 |
pdf_texts = {}
|
56 |
|
@@ -76,9 +106,10 @@ template = """
|
|
76 |
\'Hakim Ketua\', \'Hakim Anggota\', \'Panitera\', \'Putusan\', \'Putusan Lainnya\', \'Catatan Putusan\', \'Tanggal Musyawarah\', \'Tanggal Pembacaan\', \'Jenis Institusi Yudisial\', \'Tanggal Pendaftaran\', \'Institusi Yudisial\', \'Nomor Kasus\', \'Pengadilan\', \'Nama Terdakwa\', \'Tempat Lahir Terdakwa\', \'Tanggal Lahir Terdakwa\', \'Usia Terdakwa\', \'Jenis Kelamin Terdakwa\', \'Kebangsaan Terdakwa\', \'Agama Terdakwa\', \'Pekerjaan Terdakwa\', \'Pasal Dakwaan\',\' Pelanggaran Dakwaan\', \'Vonis Hukuman\', \'Deskripsi Vonis Atribut Disita\', \'Vonis Atribut Disita Berat\', \'Denda\', dan, \'Kesimpulan\'.
|
77 |
# """
|
78 |
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
if st.button("Analyze Selected PDFs"):
|
82 |
summaries = []
|
83 |
for pdf_name, text in pdf_texts.items():
|
84 |
response = client.chat.completions.create(
|
@@ -95,8 +126,12 @@ if st.button("Analyze Selected PDFs"):
|
|
95 |
df.columns = [f"Kesimpulan Putusan ({pdf_name})"]
|
96 |
|
97 |
summaries.append(df)
|
|
|
98 |
|
99 |
# Display the summaries for each selected PDF
|
100 |
for summary in summaries:
|
101 |
-
|
|
|
102 |
st.dataframe(summary)
|
|
|
|
|
|
15 |
client = OpenAI(api_key=api_key)
|
16 |
pdf_folder = "pdf"
|
17 |
|
18 |
+
# CSS for the bold colored line
|
19 |
+
bold_line_css = """
|
20 |
+
<style>
|
21 |
+
.centered-title {
|
22 |
+
text-align: center;
|
23 |
+
margin-bottom: 0; # No gap between the title and the line
|
24 |
+
}
|
25 |
+
.bold-colored-line {
|
26 |
+
border: none;
|
27 |
+
height: 3px; # Thickness of the line
|
28 |
+
background-color: #FF6347; # Tomato color
|
29 |
+
margin-top: 0; # No gap at the top
|
30 |
+
}
|
31 |
+
</style>
|
32 |
+
"""
|
33 |
+
|
34 |
+
# Add the custom CSS to the Streamlit app
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
st.markdown(
|
39 |
+
"""
|
40 |
+
<h1 style='text-align: center;'>
|
41 |
+
Mahkamah Agung: NER & Summarization of Legal Documents
|
42 |
+
</h1>
|
43 |
+
<hr>
|
44 |
+
""",
|
45 |
+
unsafe_allow_html=True
|
46 |
+
)
|
47 |
+
|
48 |
|
49 |
#---------------------PDF OVERVIEW----------------------
|
|
|
50 |
def get_pdf_details(folder_path):
|
51 |
pdf_details = []
|
52 |
for filename in os.listdir(folder_path):
|
|
|
56 |
with open(pdf_path, "rb") as file:
|
57 |
pdf_reader = PdfReader(file)
|
58 |
page_count = len(pdf_reader.pages)
|
59 |
+
pdf_details.append({"Berkas Putusan": filename, "Jumlah Halaman": page_count})
|
60 |
except Exception as e:
|
61 |
st.warning(f"Could not read {filename}: {str(e)}")
|
62 |
return pdf_details
|
|
|
64 |
pdf_list = get_pdf_details(pdf_folder)
|
65 |
pdf_df = pd.DataFrame(pdf_list)
|
66 |
|
67 |
+
col1, col2= st.columns(2)
|
68 |
+
with col1:
|
69 |
+
if not pdf_df.empty:
|
70 |
+
with st.expander('PDF Overview'):
|
71 |
+
st.dataframe(pdf_df)
|
72 |
+
else:
|
73 |
+
st.warning("No PDFs found in the specified folder.")
|
74 |
|
75 |
#---------------------MULTISELECT AND TEXT EXTRACTION----------------------
|
|
|
76 |
|
77 |
pdf_files = [f for f in os.listdir(pdf_folder) if f.lower().endswith('.pdf')]
|
78 |
+
with col2:
|
79 |
+
with st.expander('PDF Documents'):
|
80 |
+
selected_pdfs = st.multiselect("Select PDFs", pdf_files)
|
81 |
+
def extract_text_from_pdf(uploaded_file, start_page, end_page):
|
82 |
+
text = extract_text(uploaded_file, page_numbers=range(start_page, end_page+1))
|
83 |
+
return text
|
84 |
|
85 |
pdf_texts = {}
|
86 |
|
|
|
106 |
\'Hakim Ketua\', \'Hakim Anggota\', \'Panitera\', \'Putusan\', \'Putusan Lainnya\', \'Catatan Putusan\', \'Tanggal Musyawarah\', \'Tanggal Pembacaan\', \'Jenis Institusi Yudisial\', \'Tanggal Pendaftaran\', \'Institusi Yudisial\', \'Nomor Kasus\', \'Pengadilan\', \'Nama Terdakwa\', \'Tempat Lahir Terdakwa\', \'Tanggal Lahir Terdakwa\', \'Usia Terdakwa\', \'Jenis Kelamin Terdakwa\', \'Kebangsaan Terdakwa\', \'Agama Terdakwa\', \'Pekerjaan Terdakwa\', \'Pasal Dakwaan\',\' Pelanggaran Dakwaan\', \'Vonis Hukuman\', \'Deskripsi Vonis Atribut Disita\', \'Vonis Atribut Disita Berat\', \'Denda\', dan, \'Kesimpulan\'.
|
107 |
# """
|
108 |
|
109 |
+
if st.button("📝Process"):
|
110 |
+
st.session_state.summaries = True
|
111 |
|
112 |
+
if st.session_state.summaries:
|
|
|
113 |
summaries = []
|
114 |
for pdf_name, text in pdf_texts.items():
|
115 |
response = client.chat.completions.create(
|
|
|
126 |
df.columns = [f"Kesimpulan Putusan ({pdf_name})"]
|
127 |
|
128 |
summaries.append(df)
|
129 |
+
st.session_state.summaries=True
|
130 |
|
131 |
# Display the summaries for each selected PDF
|
132 |
for summary in summaries:
|
133 |
+
st.session_state.summaries=True
|
134 |
+
with st.expander(f"{summary.columns[0]}"):
|
135 |
st.dataframe(summary)
|
136 |
+
|
137 |
+
`
|