Update utils.py
Browse files
utils.py
CHANGED
@@ -270,11 +270,11 @@ def access_pdf(self, filename):
|
|
270 |
|
271 |
# Datei aus dem Hugging Face Space herunterladen
|
272 |
hf_hub_download(
|
273 |
-
repo_id=
|
274 |
-
filename=
|
275 |
repo_type=DATA_REPO_TYPE,
|
276 |
local_dir=os.path.dirname(temp_path),
|
277 |
-
|
278 |
)
|
279 |
|
280 |
return temp_path
|
@@ -929,23 +929,35 @@ class CustomLoader:
|
|
929 |
self.loader_func = loader_func
|
930 |
|
931 |
def load(self):
|
932 |
-
|
933 |
-
|
934 |
-
|
935 |
-
|
936 |
-
|
|
|
937 |
# Datei aus dem Hugging Face Space herunterladen
|
938 |
-
hf_hub_download(
|
939 |
repo_id=STORAGE_REPO_ID,
|
940 |
filename=file_path,
|
941 |
repo_type="space",
|
942 |
local_dir=os.path.dirname(temp_path),
|
943 |
-
local_dir_use_symlinks=False,
|
944 |
token=hf_token
|
945 |
)
|
946 |
-
|
947 |
-
|
948 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
949 |
|
950 |
|
951 |
|
|
|
270 |
|
271 |
# Datei aus dem Hugging Face Space herunterladen
|
272 |
hf_hub_download(
|
273 |
+
repo_id=STORAGE_REPO_ID,
|
274 |
+
filename=file_path,
|
275 |
repo_type=DATA_REPO_TYPE,
|
276 |
local_dir=os.path.dirname(temp_path),
|
277 |
+
token=hf_token
|
278 |
)
|
279 |
|
280 |
return temp_path
|
|
|
929 |
self.loader_func = loader_func
|
930 |
|
931 |
def load(self):
|
932 |
+
documents = []
|
933 |
+
for file_path in self.file_list:
|
934 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=self.file_type) as temp_file:
|
935 |
+
temp_path = temp_file.name
|
936 |
+
|
937 |
+
try:
|
938 |
# Datei aus dem Hugging Face Space herunterladen
|
939 |
+
downloaded_path = hf_hub_download(
|
940 |
repo_id=STORAGE_REPO_ID,
|
941 |
filename=file_path,
|
942 |
repo_type="space",
|
943 |
local_dir=os.path.dirname(temp_path),
|
|
|
944 |
token=hf_token
|
945 |
)
|
946 |
+
|
947 |
+
# Überprüfen, ob die Datei leer ist
|
948 |
+
if os.path.getsize(downloaded_path) == 0:
|
949 |
+
print(f"Warnung: Die Datei {file_path} ist leer und wird übersprungen.")
|
950 |
+
continue
|
951 |
+
|
952 |
+
documents.extend(self.loader_func(downloaded_path))
|
953 |
+
except Exception as e:
|
954 |
+
print(f"Fehler beim Verarbeiten der Datei {file_path}: {str(e)}")
|
955 |
+
finally:
|
956 |
+
# Sicherstellen, dass die temporäre Datei gelöscht wird
|
957 |
+
if os.path.exists(temp_path):
|
958 |
+
os.unlink(temp_path)
|
959 |
+
|
960 |
+
return documents
|
961 |
|
962 |
|
963 |
|