podcast-search / src /config.py
terapyon's picture
for HF host
12dac72
raw
history blame contribute delete
599 Bytes
from datetime import timedelta
import re
from pathlib import Path
# import logging
HF_HOST = True
HF_REPO_TYPE = "dataset"
HF_REPO_ID = "terapyon/terapyon-podcast"
HF_FILENAME = "terapyon-podcast-20250104.duckdb"
HERE = Path(__file__).resolve().parent
DUCKDB_FILE = HERE.parent / "db" / "terapyon-podcast.duckdb"
STORE_DIR = HERE.parent / "store"
DATA_DIR = HERE.parent / "data"
PODCAST_TITLE_LIST = str(STORE_DIR / 'title-list-202301-202501.parquet')
EPISODES_PARQUET = str(STORE_DIR / 'podcast-*.parquet')
divider_time = timedelta(minutes=1)
RE_PODCAST_SRT_FILE = re.compile(r"[_-](\d+)[_-]")