Update utils.py
Browse files
utils.py
CHANGED
@@ -424,26 +424,8 @@ def rag_chain(llm, prompt, retriever):
|
|
424 |
def query(api_llm, payload):
|
425 |
response = requests.post(api_llm, headers=HEADERS, json=payload)
|
426 |
return response.json()
|
427 |
-
|
428 |
|
429 |
|
430 |
-
def extract_document_info(documents):
|
431 |
-
extracted_info = []
|
432 |
-
for doc in documents:
|
433 |
-
# Extract the filename from the path to use as the title
|
434 |
-
filename = os.path.basename(doc.metadata.get("path", ""))
|
435 |
-
title = filename if filename else "Keine Überschrift"
|
436 |
-
|
437 |
-
info = {
|
438 |
-
'content': doc.page_content,
|
439 |
-
'metadata': doc.metadata,
|
440 |
-
'titel': title,
|
441 |
-
'seite': doc.metadata.get("page", "Unbekannte Seite"),
|
442 |
-
'pfad': doc.metadata.get("path", "Kein Pfad verfügbar")
|
443 |
-
}
|
444 |
-
extracted_info.append(info)
|
445 |
-
return extracted_info
|
446 |
-
|
447 |
|
448 |
|
449 |
def extract_document_info(documents):
|
@@ -456,9 +438,9 @@ def extract_document_info(documents):
|
|
456 |
# Determine the document type and adjust the path accordingly
|
457 |
doc_path = doc.metadata.get("path", "")
|
458 |
if doc_path.endswith('.pdf'):
|
459 |
-
download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/pdf/{title}
|
460 |
elif doc_path.endswith('.docx'):
|
461 |
-
download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/word/{title}
|
462 |
else:
|
463 |
download_link = doc_path
|
464 |
|
|
|
424 |
def query(api_llm, payload):
|
425 |
response = requests.post(api_llm, headers=HEADERS, json=payload)
|
426 |
return response.json()
|
|
|
427 |
|
428 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
|
431 |
def extract_document_info(documents):
|
|
|
438 |
# Determine the document type and adjust the path accordingly
|
439 |
doc_path = doc.metadata.get("path", "")
|
440 |
if doc_path.endswith('.pdf'):
|
441 |
+
download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/pdf/{title}"
|
442 |
elif doc_path.endswith('.docx'):
|
443 |
+
download_link = f"https://huggingface.co/spaces/alexkueck/SucheRAG/resolve/main/chroma/kkg/word/{title}"
|
444 |
else:
|
445 |
download_link = doc_path
|
446 |
|