import json | |
from sbert import SBERT as sentence_bert | |
import faiss | |
def init_cache(embedding_model: str = "dangvantuan/vietnamese-embedding"): | |
"""Initializes the cache with a Faiss index and an SBERT model. | |
Args: | |
embedding_model (str): The name of the SBERT model to use. | |
Returns: | |
tuple: (index, encoder) where | |
- index is a Faiss index for storing embeddings. | |
- encoder is an SBERT model instance. | |
""" | |
encoder = sentence_bert(embedding_model) | |
dimension = encoder.dimension | |
print(dimension) | |
index = faiss.IndexFlatL2(dimension) | |
if index.is_trained: | |
print('Index initialized and ready for use') | |
return index, encoder | |
def retrieve_cache(json_file): | |
try: | |
with open(json_file, 'r') as file: | |
cache = json.load(file) | |
except FileNotFoundError: | |
cache = {'questions': [], 'answers': []} | |
return cache | |
def store_cache(json_file, cache): | |
with open(json_file, 'w', encoding = 'utf-8') as file: | |
json.dump(cache, file) |