Update utils.py
Browse files
utils.py
CHANGED
@@ -485,7 +485,7 @@ def document_storage_chroma(splits):
|
|
485 |
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
|
486 |
|
487 |
return vectorstore
|
488 |
-
|
489 |
########################################################
|
490 |
#Splits für den Vektorstore speichern - bzw. laden
|
491 |
def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
@@ -506,7 +506,7 @@ def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", pr
|
|
506 |
# Hochladen der Splits-Dateien zum Hugging Face Space
|
507 |
upload_file_to_huggingface(preprocessed_filepath, f"{directory}/{preprocessed_filename}")
|
508 |
upload_file_to_huggingface(original_filepath, f"{directory}/{original_filename}")
|
509 |
-
|
510 |
def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
511 |
# Vollständigen Pfad zur Datei erstellen
|
512 |
preprocessed_filepath = os.path.join(directory, preprocessed_filename)
|
@@ -522,11 +522,41 @@ def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_spli
|
|
522 |
|
523 |
return preprocessed_splits, original_splits
|
524 |
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
525 |
|
526 |
########################################################
|
527 |
#Vektorstore speichern - bzw. laden
|
528 |
#Laden des Vektorstores - aus den gespeicherten splits
|
529 |
-
|
530 |
def load_vectorstore():
|
531 |
splits_and_metadata = load_splits_and_metadata()
|
532 |
if splits_and_metadata is not None:
|
@@ -549,7 +579,7 @@ def save_split_to_original_mapping(mapping, directory="chroma/kkg", filename="ma
|
|
549 |
|
550 |
# Hochladen der Mapping-Datei zum Hugging Face Space
|
551 |
upload_file_to_huggingface(filepath, f"{directory}/{filename}")
|
552 |
-
|
553 |
def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
|
554 |
# Vollständigen Pfad zur Datei erstellen
|
555 |
filepath = os.path.join(directory, filename)
|
@@ -559,7 +589,24 @@ def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl
|
|
559 |
with open(filepath, "rb") as f:
|
560 |
return pickle.load(f)
|
561 |
return None
|
|
|
562 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
563 |
|
564 |
|
565 |
#######################################
|
|
|
485 |
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
|
486 |
|
487 |
return vectorstore
|
488 |
+
|
489 |
########################################################
|
490 |
#Splits für den Vektorstore speichern - bzw. laden
|
491 |
def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
|
|
506 |
# Hochladen der Splits-Dateien zum Hugging Face Space
|
507 |
upload_file_to_huggingface(preprocessed_filepath, f"{directory}/{preprocessed_filename}")
|
508 |
upload_file_to_huggingface(original_filepath, f"{directory}/{original_filename}")
|
509 |
+
"""
|
510 |
def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
511 |
# Vollständigen Pfad zur Datei erstellen
|
512 |
preprocessed_filepath = os.path.join(directory, preprocessed_filename)
|
|
|
522 |
|
523 |
return preprocessed_splits, original_splits
|
524 |
return None, None
|
525 |
+
"""
|
526 |
+
def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
527 |
+
preprocessed_splits = None
|
528 |
+
original_splits = None
|
529 |
+
|
530 |
+
try:
|
531 |
+
# Laden der vorverarbeiteten Splits aus dem Hugging Face Repository
|
532 |
+
preprocessed_file = hf_hub_download(
|
533 |
+
repo_id=STORAGE_REPO_ID,
|
534 |
+
filename=f"{directory}/{preprocessed_filename}",
|
535 |
+
repo_type="space",
|
536 |
+
token=hf_token
|
537 |
+
)
|
538 |
+
with open(preprocessed_file, "rb") as f:
|
539 |
+
preprocessed_splits = pickle.load(f)
|
540 |
+
|
541 |
+
# Laden der originalen Splits aus dem Hugging Face Repository
|
542 |
+
original_file = hf_hub_download(
|
543 |
+
repo_id=STORAGE_REPO_ID,
|
544 |
+
filename=f"{directory}/{original_filename}",
|
545 |
+
repo_type="space",
|
546 |
+
token=hf_token
|
547 |
+
)
|
548 |
+
with open(original_file, "rb") as f:
|
549 |
+
original_splits = pickle.load(f)
|
550 |
+
|
551 |
+
except Exception as e:
|
552 |
+
print(f"Fehler beim Laden der Splits: {str(e)}")
|
553 |
+
|
554 |
+
return preprocessed_splits, original_splits
|
555 |
|
556 |
########################################################
|
557 |
#Vektorstore speichern - bzw. laden
|
558 |
#Laden des Vektorstores - aus den gespeicherten splits
|
559 |
+
"""
|
560 |
def load_vectorstore():
|
561 |
splits_and_metadata = load_splits_and_metadata()
|
562 |
if splits_and_metadata is not None:
|
|
|
579 |
|
580 |
# Hochladen der Mapping-Datei zum Hugging Face Space
|
581 |
upload_file_to_huggingface(filepath, f"{directory}/{filename}")
|
582 |
+
"""
|
583 |
def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
|
584 |
# Vollständigen Pfad zur Datei erstellen
|
585 |
filepath = os.path.join(directory, filename)
|
|
|
589 |
with open(filepath, "rb") as f:
|
590 |
return pickle.load(f)
|
591 |
return None
|
592 |
+
"""
|
593 |
|
594 |
+
def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
|
595 |
+
try:
|
596 |
+
# Laden des Mappings aus dem Hugging Face Repository
|
597 |
+
file_path = hf_hub_download(
|
598 |
+
repo_id=STORAGE_REPO_ID,
|
599 |
+
filename=f"{directory}/{filename}",
|
600 |
+
repo_type="space",
|
601 |
+
token=hf_token
|
602 |
+
)
|
603 |
+
|
604 |
+
with open(file_path, "rb") as f:
|
605 |
+
return pickle.load(f)
|
606 |
+
|
607 |
+
except Exception as e:
|
608 |
+
print(f"Fehler beim Laden des Mappings: {str(e)}")
|
609 |
+
return None
|
610 |
|
611 |
|
612 |
#######################################
|