|
import os |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
|
|
|
|
DEFAULT_PROVIDER = "openai/gpt-4o-mini" |
|
MODEL_REPO_BRANCH = "new-release-0.0.2" |
|
|
|
PROVIDER_MODELS = { |
|
"ollama/llama3": "no-token-needed", |
|
"groq/llama3-70b-8192": os.getenv("GROQ_API_KEY"), |
|
"groq/llama3-8b-8192": os.getenv("GROQ_API_KEY"), |
|
"openai/gpt-4o-mini": os.getenv("OPENAI_API_KEY"), |
|
"openai/gpt-4o": os.getenv("OPENAI_API_KEY"), |
|
"openai/o1-mini": os.getenv("OPENAI_API_KEY"), |
|
"openai/o1-preview": os.getenv("OPENAI_API_KEY"), |
|
"anthropic/claude-3-haiku-20240307": os.getenv("ANTHROPIC_API_KEY"), |
|
"anthropic/claude-3-opus-20240229": os.getenv("ANTHROPIC_API_KEY"), |
|
"anthropic/claude-3-sonnet-20240229": os.getenv("ANTHROPIC_API_KEY"), |
|
"anthropic/claude-3-5-sonnet-20240620": os.getenv("ANTHROPIC_API_KEY"), |
|
} |
|
|
|
|
|
CHUNK_TOKEN_THRESHOLD = 2 ** 11 |
|
OVERLAP_RATE = 0.1 |
|
WORD_TOKEN_RATE = 1.3 |
|
|
|
|
|
MIN_WORD_THRESHOLD = 1 |
|
IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD = 1 |
|
|
|
IMPORTANT_ATTRS = ['src', 'href', 'alt', 'title', 'width', 'height'] |
|
ONLY_TEXT_ELIGIBLE_TAGS = ['b', 'i', 'u', 'span', 'del', 'ins', 'sub', 'sup', 'strong', 'em', 'code', 'kbd', 'var', 's', 'q', 'abbr', 'cite', 'dfn', 'time', 'small', 'mark'] |
|
SOCIAL_MEDIA_DOMAINS = [ |
|
'facebook.com', |
|
'twitter.com', |
|
'x.com', |
|
'linkedin.com', |
|
'instagram.com', |
|
'pinterest.com', |
|
'tiktok.com', |
|
'snapchat.com', |
|
'reddit.com', |
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
IMAGE_SCORE_THRESHOLD = 2 |
|
|
|
MAX_METRICS_HISTORY = 1000 |
|
|
|
NEED_MIGRATION = True |
|
URL_LOG_SHORTEN_LENGTH = 30 |
|
SHOW_DEPRECATION_WARNINGS = True |
|
SCREENSHOT_HEIGHT_TRESHOLD = 10000 |
|
PAGE_TIMEOUT=60000 |
|
DOWNLOAD_PAGE_TIMEOUT=60000 |