SucheRAG

Sleeping

App Files Files Community

alexkueck commited on Jul 9, 2024

Commit

51f975f

verified ·

1 Parent(s): d7e55c8

Update utils.py

Browse files

Files changed (1) hide show

utils.py +27 -13

utils.py CHANGED Viewed

@@ -793,14 +793,16 @@ def extract_document_info(documents):
         title = filename if filename else "Keine Überschrift"
         # Determine the document type and adjust the path accordingly
         doc_path = doc.metadata.get("path", "")
         if doc_path.endswith('.pdf'):
-            download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/pdf/{title}"
         elif doc_path.endswith('.docx'):
             download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/word/{title}"
         else:
             download_link = doc_path
         info = {
             'content': doc.page_content,
@@ -879,23 +881,35 @@ def download_link(doc):
         file_url = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/{doc}?token=hf_token"
         return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
 def download_link(doc):
     # Basis-URL für das Hugging Face Repository
     base_url = f"https://huggingface.co/spaces/{STORAGE_REPO_ID}/resolve/main"
-    # Check if doc is a dictionary and contains the key 'pfad'
-    if isinstance(doc, dict) and 'pfad' in doc:
-        # URL-encode the path to handle special characters
-        encoded_path = quote(doc['pfad'])
-        file_url = f"{base_url}/{encoded_path}?token={hf_token}"
-        return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc["titel"]}</a></b>'
     else:
-        # URL-encode the document name to handle special characters
-        encoded_doc = quote(doc)
-        file_url = f"{base_url}/{encoded_doc}?token={hf_token}"
-        return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
-"""

         title = filename if filename else "Keine Überschrift"
         # Determine the document type and adjust the path accordingly
+        download_link = download_link(doc)
+        """
         doc_path = doc.metadata.get("path", "")
         if doc_path.endswith('.pdf'):
+            download_link = download_link(doc) #f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/pdf/{title}"
         elif doc_path.endswith('.docx'):
             download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/word/{title}"
         else:
             download_link = doc_path
+        """
         info = {
             'content': doc.page_content,
         file_url = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/{doc}?token=hf_token"
         return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
+"""
 def download_link(doc):
     # Basis-URL für das Hugging Face Repository
     base_url = f"https://huggingface.co/spaces/{STORAGE_REPO_ID}/resolve/main"
+    if isinstance(doc, dict):
+        # Wenn doc ein Dictionary ist (wie in Ihrem ursprünglichen Beispiel)
+        if 'pfad' in doc:
+            doc_path = doc['pfad']
+            title = doc.get('titel', doc_path)
+        else:
+            return f'<b>{doc.get("titel", "Unbekannter Titel")}</b>'
     else:
+        # Wenn doc ein String ist oder ein anderes Objekt mit einem 'metadata' Attribut
+        doc_path = getattr(doc, 'metadata', {}).get('path', doc if isinstance(doc, str) else '')
+        title = os.path.basename(doc_path)
+    # Bestimmen des Dokumenttyps und Anpassen des Pfads
+    if doc_path.lower().endswith('.pdf'):
+        file_url = f"{base_url}/chroma/kkg/pdf/{quote(title)}?token={hf_token}"
+    elif doc_path.lower().endswith('.docx'):
+        file_url = f"{base_url}/chroma/kkg/word/{quote(title)}?token={hf_token}"
+    else:
+        # Fallback für andere Dateitypen
+        file_url = f"{base_url}/{quote(doc_path)}?token={hf_token}"
+    return file_url #f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{title}</a></b>'