alexkueck commited on
Commit
0b1c040
·
verified ·
1 Parent(s): 4d8f692

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +5 -7
utils.py CHANGED
@@ -336,6 +336,7 @@ def document_loading_splitting():
336
  preprocessed_docs = []
337
  original_docs = []
338
  for doc in docs:
 
339
  preprocessed_content = preprocess_text(doc.page_content)
340
  preprocessed_title = preprocess_text(doc.metadata["title"])
341
  preprocessed_metadata = {
@@ -343,11 +344,8 @@ def document_loading_splitting():
343
  "page": doc.metadata["page"],
344
  "path": doc.metadata["path"]
345
  }
346
- preprocessed_doc = Document(content=preprocessed_content, title=preprocessed_metadata["title"], page=preprocessed_metadata["page"], path=preprocessed_metadata["path"])
347
- original_doc = Document(content=doc.page_content, title=doc.metadata["title"], page=doc.metadata["page"], path=doc.metadata["path"])
348
- # Weise IDs zu
349
- preprocessed_doc.id = str(uuid.uuid4())
350
- original_doc.id = preprocessed_doc.id
351
  preprocessed_docs.append(preprocessed_doc)
352
  original_docs.append(original_doc)
353
 
@@ -710,14 +708,14 @@ shared_state = State()
710
 
711
  #Für die relevanten Dokumente - damit sie passend zum Dictionary die Attribute haben
712
  class Document:
713
- def __init__(self, content, title, page, path):
714
  self.page_content = content
715
  self.metadata = {
716
  "title": title,
717
  "page": page,
718
  "path": path
719
  }
720
- self.id = None
721
 
722
 
723
 
 
336
  preprocessed_docs = []
337
  original_docs = []
338
  for doc in docs:
339
+ doc_id = str(uuid.uuid4()) # Erzeuge eine eindeutige ID
340
  preprocessed_content = preprocess_text(doc.page_content)
341
  preprocessed_title = preprocess_text(doc.metadata["title"])
342
  preprocessed_metadata = {
 
344
  "page": doc.metadata["page"],
345
  "path": doc.metadata["path"]
346
  }
347
+ preprocessed_doc = Document(content=preprocessed_content, title=preprocessed_metadata["title"], page=preprocessed_metadata["page"], path=preprocessed_metadata["path"], doc_id=doc_id)
348
+ original_doc = Document(content=doc.page_content, title=doc.metadata["title"], page=doc.metadata["page"], path=doc.metadata["path"], doc_id=doc_id)
 
 
 
349
  preprocessed_docs.append(preprocessed_doc)
350
  original_docs.append(original_doc)
351
 
 
708
 
709
  #Für die relevanten Dokumente - damit sie passend zum Dictionary die Attribute haben
710
  class Document:
711
+ def __init__(self, content, title, page, path, doc_id=None):
712
  self.page_content = content
713
  self.metadata = {
714
  "title": title,
715
  "page": page,
716
  "path": path
717
  }
718
+ self.id = doc_id
719
 
720
 
721