Spaces:
Runtime error
Runtime error
File size: 4,946 Bytes
33d569a 4b85a7d 33d569a 95d9be0 f2a3f35 33d569a 539d08c ea4c8b4 f2a3f35 539d08c 33d569a 26b3348 f2a3f35 33d569a 44f0c8a 33d569a 4d4f8e6 f2a3f35 89ebe02 f2a3f35 5c41813 9657e54 48e1ac5 44f0c8a f2a3f35 8b1c08d f2a3f35 44f0c8a f2a3f35 44f0c8a f2a3f35 44f0c8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import streamlit as st
import os
import pandas as pd
from PyPDF2 import PdfReader
from PyPDF2 import PdfFileReader
from PyPDF2.utils import PdfReadError
import openai
from openai import OpenAI
from pdfminer.high_level import extract_text
import json
from dotenv import load_dotenv
# Initialize OpenAI API
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key
client = OpenAI(api_key=api_key)
pdf_folder = "pdf"
# CSS for the bold colored line
bold_line_css = """
<style>
.centered-title {
text-align: center;
margin-bottom: 0; # No gap between the title and the line
}
.bold-colored-line {
border: none;
height: 3px; # Thickness of the line
background-color: #FF6347; # Tomato color
margin-top: 0; # No gap at the top
}
</style>
"""
# Add the custom CSS to the Streamlit app
st.markdown(
"""
<h1 style='text-align: center;'>
Mahkamah Agung: NER & Summarization of Legal Documents
</h1>
<hr>
""",
unsafe_allow_html=True
)
#---------------------PDF OVERVIEW----------------------
# Function to read PDF file
def read_pdf(file):
try:
pdf_reader = PdfFileReader(file)
num_pages = pdf_reader.numPages
pdf_text = ""
for page_number in range(num_pages):
page = pdf_reader.getPage(page_number)
pdf_text += page.extractText()
return pdf_text
except PdfReadError as e:
st.error(f"Error reading PDF: {e}")
return None
# Sidebar
st.sidebar.title("PDF Viewer")
# File uploader for PDFs
uploaded_files = st.sidebar.file_uploader("Upload PDF", type=["pdf"], accept_multiple_files=True)
# Display uploaded PDFs
if uploaded_files:
for uploaded_file in uploaded_files:
file_details = {"Filename": uploaded_file.name, "Filesize": uploaded_file.size}
st.sidebar.write(file_details)
pdf_text = read_pdf(uploaded_file)
if pdf_text:
st.write(pdf_text)
# ---------------------UPLOAD PDF AND TEXT EXTRACTION----------------------
with col2:
with st.expander('PDF Documents'):
uploaded_files = st.file_uploader("Upload PDFs", accept_multiple_files=True)
pdf_texts = {}
for uploaded_file in uploaded_files:
pdf_reader = PdfReader(uploaded_file)
total_pages = len(pdf_reader.pages)
# Extract text from the first 3 pages and the last 3 pages
extracted_text_first = extract_text(uploaded_file, page_numbers=range(1, min(4, total_pages)))
extracted_text_last = extract_text(uploaded_file, page_numbers=range(max(1, total_pages - 2), total_pages + 1))
extracted_text = extracted_text_first + "\n" + extracted_text_last
pdf_texts[uploaded_file.name] = extracted_text
#---------------------ANALYZE AND SUMMARIZE----------------------
def get_template():
if st.checkbox("Use Custom Template"):
# Text input for custom template
custom_template = st.text_area("Input Your Template Here:")
# Use custom template if provided
if custom_template:
return custom_template
# Default template
default_template = """
# Anda Adalah Seorang Hakim Agung Di Mahkamah Agung Di Indonesia. Berdasarkan Putusan Di Bawah Ini, Berikan Kesimpulannya:
{}
Variabel Yang Harus Ada Adalah Sebagai Berikut:
\'Hakim Ketua\', \'Hakim Anggota\', \'Panitera\', \'Putusan\', \'Putusan Lainnya\', \'Catatan Putusan\', \'Tanggal Musyawarah\', \'Tanggal Pembacaan\', \'Jenis Institusi Yudisial\', \'Tanggal Pendaftaran\', \'Institusi Yudisial\', \'Nomor Kasus\', \'Pengadilan\', \'Nama Terdakwa\', \'Tempat Lahir Terdakwa\', \'Tanggal Lahir Terdakwa\', \'Usia Terdakwa\', \'Jenis Kelamin Terdakwa\', \'Kebangsaan Terdakwa\', \'Agama Terdakwa\', \'Pekerjaan Terdakwa\', \'Pasal Dakwaan\',\' Pelanggaran Dakwaan\', \'Vonis Hukuman\', \'Deskripsi Vonis Atribut Disita\', \'Vonis Atribut Disita Berat\', \'Denda\', dan, \'Kesimpulan\'.
# """
return default_template
# Get the template
template = get_template()
if st.button("📝Process"):
summaries = []
for pdf_name, text in pdf_texts.items():
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
{"role": "user", "content": template.format(text)}
]
)
data = json.loads(response.choices[0].message.content)
df = pd.json_normalize(data)
df = df.T
df.columns = [f"Kesimpulan Putusan ({pdf_name})"]
summaries.append(df)
st.session_state.summaries=True
# Display the summaries for each selected PDF
for summary in summaries:
st.session_state.summaries=True
with st.expander(f"{summary.columns[0]}"):
st.dataframe(summary)
|