SucheRAG

Sleeping

alexkueck commited on Jul 4, 2024

Commit

cddacd5

verified ·

1 Parent(s): 48fe730

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -443,6 +443,35 @@ def extract_document_info(documents):
         }
         extracted_info.append(info)
     return extracted_info

         }
         extracted_info.append(info)
     return extracted_info
+def extract_document_info(documents):
+    extracted_info = []
+    for doc in documents:
+        # Extract the filename from the path to use as the title
+        filename = os.path.basename(doc.metadata.get("path", ""))
+        title = filename if filename else "Keine Überschrift"
+        # Determine the document type and adjust the path accordingly
+        doc_path = doc.metadata.get("path", "")
+        if doc_path.endswith('.pdf'):
+            download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/pdf/{title}?token=hf_token"
+        elif doc_path.endswith('.docx'):
+            download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/word/{title}?token=hf_token"
+        else:
+            download_link = doc_path
+        info = {
+            'content': doc.page_content,
+            'metadata': doc.metadata,
+            'titel': title,
+            'seite': doc.metadata.get("page", "Unbekannte Seite"),
+            'pfad': doc_path,
+            'download_link': download_link
+        }
+        extracted_info.append(info)
+    return extracted_info