alexkueck commited on
Commit
056b7dc
·
verified ·
1 Parent(s): 4c3c894

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +25 -13
utils.py CHANGED
@@ -270,11 +270,11 @@ def access_pdf(self, filename):
270
 
271
  # Datei aus dem Hugging Face Space herunterladen
272
  hf_hub_download(
273
- repo_id=DATA_REPO_ID,
274
- filename=os.path.join(self.directory_path, filename),
275
  repo_type=DATA_REPO_TYPE,
276
  local_dir=os.path.dirname(temp_path),
277
- local_dir_use_symlinks=False
278
  )
279
 
280
  return temp_path
@@ -929,23 +929,35 @@ class CustomLoader:
929
  self.loader_func = loader_func
930
 
931
  def load(self):
932
- documents = []
933
- for file_path in self.file_list:
934
- with tempfile.NamedTemporaryFile(delete=False, suffix=self.file_type) as temp_file:
935
- temp_path = temp_file.name
936
-
 
937
  # Datei aus dem Hugging Face Space herunterladen
938
- hf_hub_download(
939
  repo_id=STORAGE_REPO_ID,
940
  filename=file_path,
941
  repo_type="space",
942
  local_dir=os.path.dirname(temp_path),
943
- local_dir_use_symlinks=False,
944
  token=hf_token
945
  )
946
- documents.extend(self.loader_func(temp_path))
947
- os.unlink(temp_path)
948
- return documents
 
 
 
 
 
 
 
 
 
 
 
 
949
 
950
 
951
 
 
270
 
271
  # Datei aus dem Hugging Face Space herunterladen
272
  hf_hub_download(
273
+ repo_id=STORAGE_REPO_ID,
274
+ filename=file_path,
275
  repo_type=DATA_REPO_TYPE,
276
  local_dir=os.path.dirname(temp_path),
277
+ token=hf_token
278
  )
279
 
280
  return temp_path
 
929
  self.loader_func = loader_func
930
 
931
  def load(self):
932
+ documents = []
933
+ for file_path in self.file_list:
934
+ with tempfile.NamedTemporaryFile(delete=False, suffix=self.file_type) as temp_file:
935
+ temp_path = temp_file.name
936
+
937
+ try:
938
  # Datei aus dem Hugging Face Space herunterladen
939
+ downloaded_path = hf_hub_download(
940
  repo_id=STORAGE_REPO_ID,
941
  filename=file_path,
942
  repo_type="space",
943
  local_dir=os.path.dirname(temp_path),
 
944
  token=hf_token
945
  )
946
+
947
+ # Überprüfen, ob die Datei leer ist
948
+ if os.path.getsize(downloaded_path) == 0:
949
+ print(f"Warnung: Die Datei {file_path} ist leer und wird übersprungen.")
950
+ continue
951
+
952
+ documents.extend(self.loader_func(downloaded_path))
953
+ except Exception as e:
954
+ print(f"Fehler beim Verarbeiten der Datei {file_path}: {str(e)}")
955
+ finally:
956
+ # Sicherstellen, dass die temporäre Datei gelöscht wird
957
+ if os.path.exists(temp_path):
958
+ os.unlink(temp_path)
959
+
960
+ return documents
961
 
962
 
963