Update utils.py
Browse files
utils.py
CHANGED
@@ -324,7 +324,8 @@ def create_directory_loader(file_type, directory_path):
|
|
324 |
filename=file_path,
|
325 |
repo_type="space",
|
326 |
local_dir=os.path.dirname(temp_path),
|
327 |
-
local_dir_use_symlinks=False
|
|
|
328 |
)
|
329 |
|
330 |
documents.extend(self.loader_func(temp_path))
|
@@ -367,6 +368,7 @@ def load_word_with_metadata(file_path):
|
|
367 |
#Vektorstore
|
368 |
################################################
|
369 |
#liste aller files in einem dir...
|
|
|
370 |
def list_files_in_hf_repo(repo_id, directory=""):
|
371 |
try:
|
372 |
repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
|
@@ -376,7 +378,7 @@ def list_files_in_hf_repo(repo_id, directory=""):
|
|
376 |
except Exception as e:
|
377 |
print(f"Fehler beim Zugriff auf das Repository.........................:{repo_id} {e}")
|
378 |
return []
|
379 |
-
|
380 |
################################################
|
381 |
# Document Splitting
|
382 |
################################################
|
@@ -429,7 +431,7 @@ def document_loading_splitting():
|
|
429 |
|
430 |
|
431 |
# Dateien im Hugging Face Space auflisten
|
432 |
-
files_in_repo = list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space")
|
433 |
pdf_files = [f for f in files_in_repo if f.endswith('.pdf') and f.startswith("chroma/kkg/pdf/")]
|
434 |
word_files = [f for f in files_in_repo if f.endswith('.docx') and f.startswith("chroma/kkg/word/")]
|
435 |
|
|
|
324 |
filename=file_path,
|
325 |
repo_type="space",
|
326 |
local_dir=os.path.dirname(temp_path),
|
327 |
+
local_dir_use_symlinks=False,
|
328 |
+
token=HF_TOKEN
|
329 |
)
|
330 |
|
331 |
documents.extend(self.loader_func(temp_path))
|
|
|
368 |
#Vektorstore
|
369 |
################################################
|
370 |
#liste aller files in einem dir...
|
371 |
+
"""
|
372 |
def list_files_in_hf_repo(repo_id, directory=""):
|
373 |
try:
|
374 |
repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
|
|
|
378 |
except Exception as e:
|
379 |
print(f"Fehler beim Zugriff auf das Repository.........................:{repo_id} {e}")
|
380 |
return []
|
381 |
+
"""
|
382 |
################################################
|
383 |
# Document Splitting
|
384 |
################################################
|
|
|
431 |
|
432 |
|
433 |
# Dateien im Hugging Face Space auflisten
|
434 |
+
files_in_repo = list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=HF_TOKEN)
|
435 |
pdf_files = [f for f in files_in_repo if f.endswith('.pdf') and f.startswith("chroma/kkg/pdf/")]
|
436 |
word_files = [f for f in files_in_repo if f.endswith('.docx') and f.startswith("chroma/kkg/word/")]
|
437 |
|