Update utils.py
Browse files
utils.py
CHANGED
@@ -282,7 +282,7 @@ def load_pdf_with_metadata(file_path):
|
|
282 |
content = page.get_text("text")
|
283 |
title = document.metadata.get("title", "Unbekannt")
|
284 |
page_number = page_num + 1
|
285 |
-
documents.append(Document(content=content, title=title, page=page_number, path=file_path))
|
286 |
return documents
|
287 |
|
288 |
#für WOrD Dokumente
|
@@ -294,7 +294,7 @@ def load_word_with_metadata(file_path):
|
|
294 |
for para in document.paragraphs:
|
295 |
content = para.text
|
296 |
page_number = 1 # Word-Dokumente haben keine Seitenzahlen in diesem Kontext
|
297 |
-
documents.append(Document(content=content, title=title, page=page_number, path=path))
|
298 |
return documents
|
299 |
|
300 |
|
|
|
282 |
content = page.get_text("text")
|
283 |
title = document.metadata.get("title", "Unbekannt")
|
284 |
page_number = page_num + 1
|
285 |
+
documents.append(Document(content=content, title=title, page=page_number, path=file_path, split_id=None))
|
286 |
return documents
|
287 |
|
288 |
#für WOrD Dokumente
|
|
|
294 |
for para in document.paragraphs:
|
295 |
content = para.text
|
296 |
page_number = 1 # Word-Dokumente haben keine Seitenzahlen in diesem Kontext
|
297 |
+
documents.append(Document(content=content, title=title, page=page_number, path=path, split_id= None))
|
298 |
return documents
|
299 |
|
300 |
|