Update utils.py
Browse files
utils.py
CHANGED
@@ -793,14 +793,16 @@ def extract_document_info(documents):
|
|
793 |
title = filename if filename else "Keine Überschrift"
|
794 |
|
795 |
# Determine the document type and adjust the path accordingly
|
|
|
|
|
796 |
doc_path = doc.metadata.get("path", "")
|
797 |
if doc_path.endswith('.pdf'):
|
798 |
-
download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/pdf/{title}"
|
799 |
elif doc_path.endswith('.docx'):
|
800 |
download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/word/{title}"
|
801 |
else:
|
802 |
download_link = doc_path
|
803 |
-
|
804 |
|
805 |
info = {
|
806 |
'content': doc.page_content,
|
@@ -879,23 +881,35 @@ def download_link(doc):
|
|
879 |
file_url = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/{doc}?token=hf_token"
|
880 |
return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
|
881 |
|
|
|
882 |
|
883 |
def download_link(doc):
|
884 |
# Basis-URL für das Hugging Face Repository
|
885 |
base_url = f"https://huggingface.co/spaces/{STORAGE_REPO_ID}/resolve/main"
|
886 |
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
|
|
893 |
else:
|
894 |
-
#
|
895 |
-
|
896 |
-
|
897 |
-
|
898 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
899 |
|
900 |
|
901 |
|
|
|
793 |
title = filename if filename else "Keine Überschrift"
|
794 |
|
795 |
# Determine the document type and adjust the path accordingly
|
796 |
+
download_link = download_link(doc)
|
797 |
+
"""
|
798 |
doc_path = doc.metadata.get("path", "")
|
799 |
if doc_path.endswith('.pdf'):
|
800 |
+
download_link = download_link(doc) #f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/pdf/{title}"
|
801 |
elif doc_path.endswith('.docx'):
|
802 |
download_link = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/chroma/kkg/word/{title}"
|
803 |
else:
|
804 |
download_link = doc_path
|
805 |
+
"""
|
806 |
|
807 |
info = {
|
808 |
'content': doc.page_content,
|
|
|
881 |
file_url = f"https://huggingface.co/spaces/alexkueck/{STORAGE_REPO_ID}/resolve/main/{doc}?token=hf_token"
|
882 |
return f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{doc}</a></b>'
|
883 |
|
884 |
+
"""
|
885 |
|
886 |
def download_link(doc):
|
887 |
# Basis-URL für das Hugging Face Repository
|
888 |
base_url = f"https://huggingface.co/spaces/{STORAGE_REPO_ID}/resolve/main"
|
889 |
|
890 |
+
if isinstance(doc, dict):
|
891 |
+
# Wenn doc ein Dictionary ist (wie in Ihrem ursprünglichen Beispiel)
|
892 |
+
if 'pfad' in doc:
|
893 |
+
doc_path = doc['pfad']
|
894 |
+
title = doc.get('titel', doc_path)
|
895 |
+
else:
|
896 |
+
return f'<b>{doc.get("titel", "Unbekannter Titel")}</b>'
|
897 |
else:
|
898 |
+
# Wenn doc ein String ist oder ein anderes Objekt mit einem 'metadata' Attribut
|
899 |
+
doc_path = getattr(doc, 'metadata', {}).get('path', doc if isinstance(doc, str) else '')
|
900 |
+
title = os.path.basename(doc_path)
|
901 |
+
|
902 |
+
# Bestimmen des Dokumenttyps und Anpassen des Pfads
|
903 |
+
if doc_path.lower().endswith('.pdf'):
|
904 |
+
file_url = f"{base_url}/chroma/kkg/pdf/{quote(title)}?token={hf_token}"
|
905 |
+
elif doc_path.lower().endswith('.docx'):
|
906 |
+
file_url = f"{base_url}/chroma/kkg/word/{quote(title)}?token={hf_token}"
|
907 |
+
else:
|
908 |
+
# Fallback für andere Dateitypen
|
909 |
+
file_url = f"{base_url}/{quote(doc_path)}?token={hf_token}"
|
910 |
+
|
911 |
+
return file_url #f'<b><a href="{file_url}" target="_blank" style="color: #BB70FC; font-weight: bold;">{title}</a></b>'
|
912 |
+
|
913 |
|
914 |
|
915 |
|