alexkueck commited on
Commit
97a9761
·
verified ·
1 Parent(s): f86fc4a

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +61 -0
utils.py CHANGED
@@ -397,6 +397,7 @@ def document_storage_chroma(splits):
397
 
398
  ########################################################
399
  #Vektorstore speichern - bzw. laden
 
400
  def save_splits_and_metadata(splits, directory="chroma/kkg", filename="splits_and_metadata.pkl"):
401
  # Erstellen des Verzeichnisses, falls es nicht existiert
402
  if not os.path.exists(directory):
@@ -427,6 +428,66 @@ def load_vectorstore():
427
  PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING = splits_and_metadata
428
  return document_storage_chroma(PREPROCESSED_SPLITS)
429
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
  # Beispiel-Upload-Funktion
432
  def upload_file_to_huggingface(file_path, upload_path):
 
397
 
398
  ########################################################
399
  #Vektorstore speichern - bzw. laden
400
+ """
401
  def save_splits_and_metadata(splits, directory="chroma/kkg", filename="splits_and_metadata.pkl"):
402
  # Erstellen des Verzeichnisses, falls es nicht existiert
403
  if not os.path.exists(directory):
 
428
  PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING = splits_and_metadata
429
  return document_storage_chroma(PREPROCESSED_SPLITS)
430
  return None
431
+ """
432
+
433
+ def save_split_to_original_mapping(mapping, directory="chroma/kkg", mapping_filename="mapping.pkl"):
434
+ # Erstellen des Verzeichnisses, falls es nicht existiert
435
+ if not os.path.exists(directory):
436
+ os.makedirs(directory)
437
+
438
+ # Speichern des Mappings
439
+ mapping_filepath = os.path.join(directory, mapping_filename)
440
+ with open(mapping_filepath, "wb") as f:
441
+ pickle.dump(mapping, f)
442
+
443
+ # Hochladen der Mapping-Datei zum Hugging Face Space
444
+ upload_file_to_huggingface(mapping_filepath, f"{directory}/{mapping_filename}")
445
+
446
+ def load_split_to_original_mapping(directory="chroma/kkg", mapping_filename="mapping.pkl"):
447
+ # Laden des Mappings
448
+ mapping_filepath = os.path.join(directory, mapping_filename)
449
+
450
+ if os.path.exists(mapping_filepath):
451
+ with open(mapping_filepath, "rb") as f:
452
+ return pickle.load(f)
453
+ return None
454
+
455
+ def save_vectorstore(vectorstore, directory="chroma/kkg", splits_filename="splits_and_metadata.pkl", vectorstore_filename="vectorstore.pkl"):
456
+ # Erstellen des Verzeichnisses, falls es nicht existiert
457
+ if not os.path.exists(directory):
458
+ os.makedirs(directory)
459
+
460
+ # Speichern der Splits und Metadaten
461
+ splits_filepath = os.path.join(directory, splits_filename)
462
+ with open(splits_filepath, "wb") as f:
463
+ pickle.dump((vectorstore.documents, SPLIT_TO_ORIGINAL_MAPPING), f)
464
+
465
+ # Speichern des Vektorstores (ohne die SQLite-Verbindung)
466
+ vectorstore_filepath = os.path.join(directory, vectorstore_filename)
467
+ with open(vectorstore_filepath, "wb") as f:
468
+ pickle.dump(vectorstore, f)
469
+
470
+ # Hochladen der gespeicherten Dateien zum Hugging Face Space
471
+ upload_file_to_huggingface(splits_filepath, f"{directory}/{splits_filename}")
472
+ upload_file_to_huggingface(vectorstore_filepath, f"{directory}/{vectorstore_filename}")
473
+
474
+ def load_vectorstore(directory="chroma/kkg", splits_filename="splits_and_metadata.pkl", vectorstore_filename="vectorstore.pkl"):
475
+ # Laden der Splits und Metadaten
476
+ splits_filepath = os.path.join(directory, splits_filename)
477
+ vectorstore_filepath = os.path.join(directory, vectorstore_filename)
478
+
479
+ if os.path.exists(splits_filepath) and os.path.exists(vectorstore_filepath):
480
+ with open(splits_filepath, "rb") as f:
481
+ documents, SPLIT_TO_ORIGINAL_MAPPING = pickle.load(f)
482
+
483
+ with open(vectorstore_filepath, "rb") as f:
484
+ vectorstore = pickle.load(f)
485
+ vectorstore.documents = documents # Setze die Dokumente im Vektorstore
486
+ return vectorstore, SPLIT_TO_ORIGINAL_MAPPING
487
+ return None, None
488
+
489
+
490
+
491
 
492
  # Beispiel-Upload-Funktion
493
  def upload_file_to_huggingface(file_path, upload_path):