from datetime import timedelta import re from pathlib import Path # import logging HF_HOST = False HF_REPO_TYPE = "dataset" HF_REPO_ID = "terapyon/terapyon-podcast" HF_FILENAME = "terapyon-podcast-20250104.duckdb" HERE = Path(__file__).resolve().parent DUCKDB_FILE = HERE.parent / "db" / "terapyon-podcast.duckdb" STORE_DIR = HERE.parent / "store" DATA_DIR = HERE.parent / "data" PODCAST_TITLE_LIST = str(STORE_DIR / 'title-list-202301-202501.parquet') EPISODES_PARQUET = str(STORE_DIR / 'podcast-*.parquet') divider_time = timedelta(minutes=1) RE_PODCAST_SRT_FILE = re.compile(r"[_-](\d+)[_-]")