File size: 4,946 Bytes
33d569a
 
 
 
4b85a7d
 
33d569a
95d9be0
f2a3f35
33d569a
539d08c
ea4c8b4
f2a3f35
539d08c
 
33d569a
26b3348
f2a3f35
33d569a
44f0c8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33d569a
 
4d4f8e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2a3f35
89ebe02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2a3f35
5c41813
9657e54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48e1ac5
44f0c8a
f2a3f35
 
8b1c08d
f2a3f35
 
 
 
 
 
 
 
 
 
 
 
 
44f0c8a
f2a3f35
 
 
44f0c8a
 
f2a3f35
44f0c8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import streamlit as st
import os
import pandas as pd
from PyPDF2 import PdfReader
from PyPDF2 import PdfFileReader
from PyPDF2.utils import PdfReadError
import openai
from openai import OpenAI
from pdfminer.high_level import extract_text
import json
from dotenv import load_dotenv

# Initialize OpenAI API
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key
client = OpenAI(api_key=api_key)
pdf_folder = "pdf"

# CSS for the bold colored line
bold_line_css = """
<style>
    .centered-title {
        text-align: center;
        margin-bottom: 0;  # No gap between the title and the line
    }
    .bold-colored-line {
        border: none;
        height: 3px;  # Thickness of the line
        background-color: #FF6347;  # Tomato color
        margin-top: 0;  # No gap at the top
    }
</style>
"""

# Add the custom CSS to the Streamlit app



st.markdown(
    """
    <h1 style='text-align: center;'>
        Mahkamah Agung: NER & Summarization of Legal Documents
    </h1>
    <hr>
    """,
    unsafe_allow_html=True
)


#---------------------PDF OVERVIEW----------------------
# Function to read PDF file
def read_pdf(file):
    try:
        pdf_reader = PdfFileReader(file)
        num_pages = pdf_reader.numPages
        pdf_text = ""
        for page_number in range(num_pages):
            page = pdf_reader.getPage(page_number)
            pdf_text += page.extractText()
        return pdf_text
    except PdfReadError as e:
        st.error(f"Error reading PDF: {e}")
        return None

# Sidebar
st.sidebar.title("PDF Viewer")

# File uploader for PDFs
uploaded_files = st.sidebar.file_uploader("Upload PDF", type=["pdf"], accept_multiple_files=True)

# Display uploaded PDFs
if uploaded_files:
    for uploaded_file in uploaded_files:
        file_details = {"Filename": uploaded_file.name, "Filesize": uploaded_file.size}
        st.sidebar.write(file_details)
        pdf_text = read_pdf(uploaded_file)
        if pdf_text:
            st.write(pdf_text)




# ---------------------UPLOAD PDF AND TEXT EXTRACTION----------------------

with col2:
    with st.expander('PDF Documents'):
        uploaded_files = st.file_uploader("Upload PDFs", accept_multiple_files=True)
        
    pdf_texts = {}
    for uploaded_file in uploaded_files:
        pdf_reader = PdfReader(uploaded_file)
        total_pages = len(pdf_reader.pages)
        # Extract text from the first 3 pages and the last 3 pages
        extracted_text_first = extract_text(uploaded_file, page_numbers=range(1, min(4, total_pages)))
        extracted_text_last = extract_text(uploaded_file, page_numbers=range(max(1, total_pages - 2), total_pages + 1))
        extracted_text = extracted_text_first + "\n" + extracted_text_last
        pdf_texts[uploaded_file.name] = extracted_text


#---------------------ANALYZE AND SUMMARIZE----------------------


def get_template():
    if st.checkbox("Use Custom Template"):
        # Text input for custom template
        custom_template = st.text_area("Input Your Template Here:")
        # Use custom template if provided
        if custom_template:
            return custom_template
    # Default template
    default_template = """
    # Anda Adalah Seorang Hakim Agung Di Mahkamah Agung Di Indonesia. Berdasarkan Putusan Di Bawah Ini, Berikan Kesimpulannya:
    {}
    Variabel Yang Harus Ada Adalah Sebagai Berikut: 
    \'Hakim Ketua\', \'Hakim Anggota\', \'Panitera\', \'Putusan\', \'Putusan Lainnya\', \'Catatan Putusan\', \'Tanggal Musyawarah\', \'Tanggal Pembacaan\', \'Jenis Institusi Yudisial\', \'Tanggal Pendaftaran\', \'Institusi Yudisial\', \'Nomor Kasus\', \'Pengadilan\', \'Nama Terdakwa\', \'Tempat Lahir Terdakwa\', \'Tanggal Lahir Terdakwa\', \'Usia Terdakwa\', \'Jenis Kelamin Terdakwa\', \'Kebangsaan Terdakwa\', \'Agama Terdakwa\', \'Pekerjaan Terdakwa\', \'Pasal Dakwaan\',\' Pelanggaran Dakwaan\', \'Vonis Hukuman\', \'Deskripsi Vonis Atribut Disita\', \'Vonis Atribut Disita Berat\', \'Denda\', dan,  \'Kesimpulan\'.
    # """
    return default_template

# Get the template
template = get_template()




if st.button("📝Process"):
    summaries = []
    for pdf_name, text in pdf_texts.items():
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
                {"role": "user", "content": template.format(text)}
            ]
        )

        data = json.loads(response.choices[0].message.content)
        df = pd.json_normalize(data)
        df = df.T
        df.columns = [f"Kesimpulan Putusan ({pdf_name})"]
        
        summaries.append(df)
        st.session_state.summaries=True

    # Display the summaries for each selected PDF
    for summary in summaries:
        st.session_state.summaries=True
        with st.expander(f"{summary.columns[0]}"):
            st.dataframe(summary)