Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import glob | |
import os | |
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter | |
from transformers import AutoTokenizer | |
from langchain_community.document_loaders import PyMuPDFLoader | |
path_to_data = "./data/" | |
def process_pdf(): | |
files = {'ABC':'./data/MWTS2021.pdf', | |
'XYZ':'./data/Consolidated2021.pdf'} | |
docs = {} | |
for file,value in files.items(): | |
try: | |
docs[file] = PyMuPDFLoader(value).load() | |
except Exception as e: | |
print("Exception: ", e) | |