SkazuHD's picture
init space
d660b02
from loguru import logger
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
# --- Required settings even when working locally. ---
# OpenAI API
OPENAI_MODEL_ID: str = "gpt-4o-mini"
OPENAI_API_KEY: str | None = None
LLAMA_MODEL_ID: str = "llama3.1"
# Huggingface API
HUGGINGFACE_ACCESS_TOKEN: str | None = None
# Comet ML (during training)
COMET_API_KEY: str | None = None
COMET_PROJECT: str = "cs370"
# --- Required settings when deploying the code. ---
# --- Otherwise, default values values work fine. ---
# MongoDB database
DATABASE_HOST: str = "mongodb://llm_engineering:llm_engineering@llm_engineering_mongo:27017"
DATABASE_NAME: str = "cs370"
# Qdrant vector database
USE_QDRANT_CLOUD: bool = False
QDRANT_DATABASE_HOST: str = "llm_engineering_qdrant"
QDRANT_DATABASE_PORT: int = 6333
QDRANT_CLOUD_URL: str = "str"
QDRANT_APIKEY: str | None = None
# AWS Authentication
AWS_REGION: str = "eu-central-1"
AWS_ACCESS_KEY: str | None = None
AWS_SECRET_KEY: str | None = None
AWS_ARN_ROLE: str | None = None
# --- Optional settings used to tweak the code. ---
# AWS SageMaker
HF_MODEL_ID: str = "mlabonne/TwinLlama-3.1-8B-DPO"
GPU_INSTANCE_TYPE: str = "ml.g5.2xlarge"
SM_NUM_GPUS: int = 1
MAX_INPUT_LENGTH: int = 2048
MAX_TOTAL_TOKENS: int = 4096
MAX_BATCH_TOTAL_TOKENS: int = 4096
COPIES: int = 1 # Number of replicas
GPUS: int = 1 # Number of GPUs
CPUS: int = 2 # Number of CPU cores
SAGEMAKER_ENDPOINT_CONFIG_INFERENCE: str = "twin"
SAGEMAKER_ENDPOINT_INFERENCE: str = "twin"
TEMPERATURE_INFERENCE: float = 0.01
TOP_P_INFERENCE: float = 0.9
MAX_NEW_TOKENS_INFERENCE: int = 150
# RAG
TEXT_EMBEDDING_MODEL_ID: str = "sentence-transformers/all-MiniLM-L6-v2"
RERANKING_CROSS_ENCODER_MODEL_ID: str = "cross-encoder/ms-marco-MiniLM-L-4-v2"
RAG_MODEL_DEVICE: str = "cpu"
# LinkedIn Credentials
LINKEDIN_USERNAME: str | None = None
LINKEDIN_PASSWORD: str | None = None
@property
def OPENAI_MAX_TOKEN_WINDOW(self) -> int:
official_max_token_window = {
"gpt-3.5-turbo": 16385,
"gpt-4-turbo": 128000,
"gpt-4o": 128000,
"gpt-4o-mini": 128000,
}.get(self.OPENAI_MODEL_ID, 128000)
max_token_window = int(official_max_token_window * 0.90)
return max_token_window
@classmethod
def load_settings(cls) -> "Settings":
"""
Tries to load the settings from the ZenML secret store. If the secret does not exist, it initializes the settings from the .env file and default values.
Returns:
Settings: The initialized settings object.
"""
try:
logger.info("Loading settings from the ZenML secret store.")
settings = Settings()
#settings_secrets = Client().get_secret("settings")
#settings = Settings(**settings_secrets.secret_values)
except (RuntimeError, KeyError):
logger.warning(
"Failed to load settings from the ZenML secret store. Defaulting to loading the settings from the '.env' file."
)
settings = Settings()
return settings
def export(self) -> None:
"""
Exports the settings to the ZenML secret store.
"""
pass
#env_vars = settings.model_dump()
#for key, value in env_vars.items():
# env_vars[key] = str(value)
#
#client = Client()
#
#try:
# client.create_secret(name="settings", values=env_vars)
#except EntityExistsError:
# logger.warning(
# "Secret 'scope' already exists. Delete it manually by running 'zenml secret delete settings', before trying to recreate it."
# )
settings = Settings.load_settings()