File size: 599 Bytes
d788666
 
ef781c2
 
 
 
12dac72
89e36c5
 
 
 
ef781c2
 
89e36c5
d788666
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from datetime import timedelta
import re
from pathlib import Path
# import logging


HF_HOST = True
HF_REPO_TYPE = "dataset"
HF_REPO_ID = "terapyon/terapyon-podcast"
HF_FILENAME = "terapyon-podcast-20250104.duckdb"

HERE = Path(__file__).resolve().parent
DUCKDB_FILE = HERE.parent / "db" / "terapyon-podcast.duckdb"

STORE_DIR = HERE.parent / "store"
DATA_DIR = HERE.parent / "data"
PODCAST_TITLE_LIST = str(STORE_DIR / 'title-list-202301-202501.parquet')
EPISODES_PARQUET = str(STORE_DIR / 'podcast-*.parquet')
divider_time = timedelta(minutes=1)
RE_PODCAST_SRT_FILE = re.compile(r"[_-](\d+)[_-]")