Spaces:
Sleeping
Sleeping
import sys | |
sys.path.append(".") | |
from server.knowledge_base.migrate import (create_tables, reset_tables, import_from_db, | |
folder2db, prune_db_docs, prune_folder_files) | |
from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL | |
import nltk | |
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path | |
from datetime import datetime | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser(description="please specify only one operate method once time.") | |
parser.add_argument( | |
"-r", | |
"--recreate-vs", | |
action="store_true", | |
help=(''' | |
recreate vector store. | |
use this option if you have copied document files to the content folder, but vector store has not been populated or DEFAUL_VS_TYPE/EMBEDDING_MODEL changed. | |
''' | |
) | |
) | |
parser.add_argument( | |
"--create-tables", | |
action="store_true", | |
help=("create empty tables if not existed") | |
) | |
parser.add_argument( | |
"--clear-tables", | |
action="store_true", | |
help=("create empty tables, or drop the database tables before recreate vector stores") | |
) | |
parser.add_argument( | |
"--import-db", | |
help="import tables from specified sqlite database" | |
) | |
parser.add_argument( | |
"-u", | |
"--update-in-db", | |
action="store_true", | |
help=(''' | |
update vector store for files exist in database. | |
use this option if you want to recreate vectors for files exist in db and skip files exist in local folder only. | |
''' | |
) | |
) | |
parser.add_argument( | |
"-i", | |
"--increment", | |
action="store_true", | |
help=(''' | |
update vector store for files exist in local folder and not exist in database. | |
use this option if you want to create vectors incrementally. | |
''' | |
) | |
) | |
parser.add_argument( | |
"--prune-db", | |
action="store_true", | |
help=(''' | |
delete docs in database that not existed in local folder. | |
it is used to delete database docs after user deleted some doc files in file browser | |
''' | |
) | |
) | |
parser.add_argument( | |
"--prune-folder", | |
action="store_true", | |
help=(''' | |
delete doc files in local folder that not existed in database. | |
is is used to free local disk space by delete unused doc files. | |
''' | |
) | |
) | |
parser.add_argument( | |
"-n", | |
"--kb-name", | |
type=str, | |
nargs="+", | |
default=[], | |
help=("specify knowledge base names to operate on. default is all folders exist in KB_ROOT_PATH.") | |
) | |
parser.add_argument( | |
"-e", | |
"--embed-model", | |
type=str, | |
default=EMBEDDING_MODEL, | |
help=("specify embeddings model.") | |
) | |
args = parser.parse_args() | |
start_time = datetime.now() | |
if args.create_tables: | |
create_tables() # confirm tables exist | |
if args.clear_tables: | |
reset_tables() | |
print("database tables reset") | |
if args.recreate_vs: | |
create_tables() | |
print("recreating all vector stores") | |
folder2db(kb_names=args.kb_name, mode="recreate_vs", embed_model=args.embed_model) | |
elif args.import_db: | |
import_from_db(args.import_db) | |
elif args.update_in_db: | |
folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model) | |
elif args.increment: | |
folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model) | |
elif args.prune_db: | |
prune_db_docs(args.kb_name) | |
elif args.prune_folder: | |
prune_folder_files(args.kb_name) | |
end_time = datetime.now() | |
print(f"总计用时: {end_time-start_time}") | |