Spaces:
Sleeping
Sleeping
insert podcast teble
Browse files- src/store.py +22 -4
src/store.py
CHANGED
@@ -1,21 +1,26 @@
|
|
|
|
1 |
import duckdb
|
2 |
from config import DUCKDB_FILE
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
5 |
def create_table():
|
6 |
conn = duckdb.connect(DUCKDB_FILE)
|
7 |
podcasts_create = """CREATE TABLE podcasts (
|
8 |
-
id
|
9 |
-
title TEXT, date DATE, guests TEXT[], length
|
10 |
);
|
11 |
"""
|
12 |
episodes_create = """CREATE TABLE episodes (
|
13 |
-
id
|
14 |
PRIMARY KEY (id, part)
|
15 |
);
|
16 |
"""
|
17 |
embeddings_create = """CREATE TABLE embeddings (
|
18 |
-
id
|
19 |
PRIMARY KEY (id, part)
|
20 |
);
|
21 |
"""
|
@@ -27,12 +32,25 @@ def create_table():
|
|
27 |
print("Tables created.")
|
28 |
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
if __name__ == "__main__":
|
31 |
import sys
|
32 |
args = sys.argv
|
33 |
if len(args) == 2:
|
34 |
if args[1] == "create":
|
35 |
create_table()
|
|
|
|
|
36 |
else:
|
37 |
print("Usage: python store.py create")
|
38 |
sys.exit(1)
|
|
|
1 |
+
from pathlib import Path
|
2 |
import duckdb
|
3 |
from config import DUCKDB_FILE
|
4 |
|
5 |
|
6 |
+
HERE = Path(__file__).parent
|
7 |
+
STORE_DIR = HERE.parent / "store"
|
8 |
+
|
9 |
+
|
10 |
def create_table():
|
11 |
conn = duckdb.connect(DUCKDB_FILE)
|
12 |
podcasts_create = """CREATE TABLE podcasts (
|
13 |
+
id BIGINT PRIMARY KEY,
|
14 |
+
title TEXT, date DATE, guests TEXT[], length BIGINT, audio TEXT
|
15 |
);
|
16 |
"""
|
17 |
episodes_create = """CREATE TABLE episodes (
|
18 |
+
id BIGINT, part BIGINT, start INTERVAL, end_ INTERVAL, text TEXT,
|
19 |
PRIMARY KEY (id, part)
|
20 |
);
|
21 |
"""
|
22 |
embeddings_create = """CREATE TABLE embeddings (
|
23 |
+
id BIGINT, part BIGINT, embedding FLOAT[1024],
|
24 |
PRIMARY KEY (id, part)
|
25 |
);
|
26 |
"""
|
|
|
32 |
print("Tables created.")
|
33 |
|
34 |
|
35 |
+
def insert_podcast():
|
36 |
+
conn = duckdb.connect(DUCKDB_FILE)
|
37 |
+
sql = """INSERT INTO podcasts
|
38 |
+
SELECT id, title, date, [], length, audio
|
39 |
+
FROM read_parquet(?);
|
40 |
+
"""
|
41 |
+
conn.execute(sql, [str(STORE_DIR / 'podcast-title-list-202301-202501.parquet')])
|
42 |
+
conn.commit()
|
43 |
+
conn.close()
|
44 |
+
|
45 |
+
|
46 |
if __name__ == "__main__":
|
47 |
import sys
|
48 |
args = sys.argv
|
49 |
if len(args) == 2:
|
50 |
if args[1] == "create":
|
51 |
create_table()
|
52 |
+
elif args[1] == "podcastinsert":
|
53 |
+
insert_podcast()
|
54 |
else:
|
55 |
print("Usage: python store.py create")
|
56 |
sys.exit(1)
|