Update utils.py
Browse files
utils.py
CHANGED
@@ -282,7 +282,7 @@ def access_pdf(self, filename):
|
|
282 |
return temp_path
|
283 |
|
284 |
#besseren directory Loader als CustomLoader definieren, der den inhalt des dokuemnts, die seitenzahlen, die überschriften und die pfadezu den dokumenten extrahieren
|
285 |
-
def create_directory_loader(file_type, directory_path):
|
286 |
loaders = {
|
287 |
'.pdf': load_pdf_with_metadata,
|
288 |
'.word': load_word_with_metadata,
|
@@ -307,9 +307,10 @@ def create_directory_loader(file_type, directory_path):
|
|
307 |
|
308 |
return CustomLoader(directory_path, file_type, loaders[file_type])
|
309 |
"""
|
310 |
-
|
311 |
-
|
312 |
self.file_type = file_type
|
|
|
313 |
self.loader_func = loader_func
|
314 |
|
315 |
def load(self):
|
@@ -437,8 +438,11 @@ def document_loading_splitting():
|
|
437 |
|
438 |
|
439 |
# Erstellen von DirectoryLoader für jeden Dateityp
|
440 |
-
pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
441 |
-
word_loader = create_directory_loader('.word', CHROMA_WORD)
|
|
|
|
|
|
|
442 |
|
443 |
|
444 |
|
|
|
282 |
return temp_path
|
283 |
|
284 |
#besseren directory Loader als CustomLoader definieren, der den inhalt des dokuemnts, die seitenzahlen, die überschriften und die pfadezu den dokumenten extrahieren
|
285 |
+
def create_custom_loader(file_type, file_list): #create_directory_loader(file_type, directory_path):
|
286 |
loaders = {
|
287 |
'.pdf': load_pdf_with_metadata,
|
288 |
'.word': load_word_with_metadata,
|
|
|
307 |
|
308 |
return CustomLoader(directory_path, file_type, loaders[file_type])
|
309 |
"""
|
310 |
+
|
311 |
+
def __init__(self, file_type, file_list, loader_func):
|
312 |
self.file_type = file_type
|
313 |
+
self.file_list = file_list
|
314 |
self.loader_func = loader_func
|
315 |
|
316 |
def load(self):
|
|
|
438 |
|
439 |
|
440 |
# Erstellen von DirectoryLoader für jeden Dateityp
|
441 |
+
# pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
442 |
+
#word_loader = create_directory_loader('.word', CHROMA_WORD)
|
443 |
+
|
444 |
+
pdf_loader = create_custom_loader('.pdf', pdf_files)
|
445 |
+
word_loader = create_custom_loader('.docx', word_files)
|
446 |
|
447 |
|
448 |
|