import streamlit as st from annotated_text import annotated_text, annotation import fitz import os import chromadb import uuid from pathlib import Path import os st.title("Contracts Classification ") import pandas as pd from langchain.retrievers import BM25Retriever, EnsembleRetriever from langchain.schema import Document from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from setfit import SetFitModel # Download from the 🤗 Hub clause_model = SetFitModel.from_pretrained("scholarly360/setfit-contracts-clauses") def util_upload_file_and_return_list_docs(uploaded_files): #util_del_cwd() list_docs = [] list_save_path = [] for uploaded_file in uploaded_files: save_path = Path(os.getcwd(), uploaded_file.name) with open(save_path, mode='wb') as w: w.write(uploaded_file.getvalue()) #print('save_path:', save_path) docs = fitz.open(save_path) list_docs.append(docs) list_save_path.append(save_path) return(list_docs, list_save_path) #### Helper Functions to Split using Rolling Window (recomm : use smaller rolling window ) def split_txt_file_synthetic_sentence_rolling(ctxt, sentence_size_in_chars, sliding_size_in_chars,debug=False): sliding_size_in_chars = sentence_size_in_chars - sliding_size_in_chars pos_start = 0 pos_end = len(ctxt) final_return = [] if(debug): print('pos_start : ',pos_start) print('pos_end : ',pos_end) if(pos_endpos_end): if(start