import streamlit as st import os import pandas as pd from PyPDF2 import PdfReader from PyPDF2 import PdfFileReader from PyPDF2.utils import PdfReadError import openai from openai import OpenAI from pdfminer.high_level import extract_text import json from dotenv import load_dotenv # Initialize OpenAI API load_dotenv() api_key = os.getenv('OPENAI_API_KEY') openai.api_key = api_key client = OpenAI(api_key=api_key) pdf_folder = "pdf" # CSS for the bold colored line bold_line_css = """ """ # Add the custom CSS to the Streamlit app st.markdown( """

Mahkamah Agung: NER & Summarization of Legal Documents


""", unsafe_allow_html=True ) #---------------------PDF OVERVIEW---------------------- # Function to read PDF file def read_pdf(file): try: pdf_reader = PdfFileReader(file) num_pages = pdf_reader.numPages pdf_text = "" for page_number in range(num_pages): page = pdf_reader.getPage(page_number) pdf_text += page.extractText() return pdf_text except PdfReadError as e: st.error(f"Error reading PDF: {e}") return None # Sidebar st.sidebar.title("PDF Viewer") # File uploader for PDFs uploaded_files = st.sidebar.file_uploader("Upload PDF", type=["pdf"], accept_multiple_files=True) # Display uploaded PDFs if uploaded_files: for uploaded_file in uploaded_files: file_details = {"Filename": uploaded_file.name, "Filesize": uploaded_file.size} st.sidebar.write(file_details) pdf_text = read_pdf(uploaded_file) if pdf_text: st.write(pdf_text) # ---------------------UPLOAD PDF AND TEXT EXTRACTION---------------------- with col2: with st.expander('PDF Documents'): uploaded_files = st.file_uploader("Upload PDFs", accept_multiple_files=True) pdf_texts = {} for uploaded_file in uploaded_files: pdf_reader = PdfReader(uploaded_file) total_pages = len(pdf_reader.pages) # Extract text from the first 3 pages and the last 3 pages extracted_text_first = extract_text(uploaded_file, page_numbers=range(1, min(4, total_pages))) extracted_text_last = extract_text(uploaded_file, page_numbers=range(max(1, total_pages - 2), total_pages + 1)) extracted_text = extracted_text_first + "\n" + extracted_text_last pdf_texts[uploaded_file.name] = extracted_text #---------------------ANALYZE AND SUMMARIZE---------------------- def get_template(): if st.checkbox("Use Custom Template"): # Text input for custom template custom_template = st.text_area("Input Your Template Here:") # Use custom template if provided if custom_template: return custom_template # Default template default_template = """ # Anda Adalah Seorang Hakim Agung Di Mahkamah Agung Di Indonesia. Berdasarkan Putusan Di Bawah Ini, Berikan Kesimpulannya: {} Variabel Yang Harus Ada Adalah Sebagai Berikut: \'Hakim Ketua\', \'Hakim Anggota\', \'Panitera\', \'Putusan\', \'Putusan Lainnya\', \'Catatan Putusan\', \'Tanggal Musyawarah\', \'Tanggal Pembacaan\', \'Jenis Institusi Yudisial\', \'Tanggal Pendaftaran\', \'Institusi Yudisial\', \'Nomor Kasus\', \'Pengadilan\', \'Nama Terdakwa\', \'Tempat Lahir Terdakwa\', \'Tanggal Lahir Terdakwa\', \'Usia Terdakwa\', \'Jenis Kelamin Terdakwa\', \'Kebangsaan Terdakwa\', \'Agama Terdakwa\', \'Pekerjaan Terdakwa\', \'Pasal Dakwaan\',\' Pelanggaran Dakwaan\', \'Vonis Hukuman\', \'Deskripsi Vonis Atribut Disita\', \'Vonis Atribut Disita Berat\', \'Denda\', dan, \'Kesimpulan\'. # """ return default_template # Get the template template = get_template() if st.button("📝Process"): summaries = [] for pdf_name, text in pdf_texts.items(): response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant designed to output JSON."}, {"role": "user", "content": template.format(text)} ] ) data = json.loads(response.choices[0].message.content) df = pd.json_normalize(data) df = df.T df.columns = [f"Kesimpulan Putusan ({pdf_name})"] summaries.append(df) st.session_state.summaries=True # Display the summaries for each selected PDF for summary in summaries: st.session_state.summaries=True with st.expander(f"{summary.columns[0]}"): st.dataframe(summary)