terapyon commited on
Commit
9d69587
·
1 Parent(s): ef781c2

insert podcast teble

Browse files
Files changed (1) hide show
  1. src/store.py +22 -4
src/store.py CHANGED
@@ -1,21 +1,26 @@
 
1
  import duckdb
2
  from config import DUCKDB_FILE
3
 
4
 
 
 
 
 
5
  def create_table():
6
  conn = duckdb.connect(DUCKDB_FILE)
7
  podcasts_create = """CREATE TABLE podcasts (
8
- id INTEGER PRIMARY KEY,
9
- title TEXT, date DATE, guests TEXT[], length INTEGER, audio TEXT
10
  );
11
  """
12
  episodes_create = """CREATE TABLE episodes (
13
- id INTEGER, part INTEGER, start INTERVAL, end_ INTERVAL, text TEXT,
14
  PRIMARY KEY (id, part)
15
  );
16
  """
17
  embeddings_create = """CREATE TABLE embeddings (
18
- id INTEGER, part INTEGER, embedding FLOAT[1024],
19
  PRIMARY KEY (id, part)
20
  );
21
  """
@@ -27,12 +32,25 @@ def create_table():
27
  print("Tables created.")
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  if __name__ == "__main__":
31
  import sys
32
  args = sys.argv
33
  if len(args) == 2:
34
  if args[1] == "create":
35
  create_table()
 
 
36
  else:
37
  print("Usage: python store.py create")
38
  sys.exit(1)
 
1
+ from pathlib import Path
2
  import duckdb
3
  from config import DUCKDB_FILE
4
 
5
 
6
+ HERE = Path(__file__).parent
7
+ STORE_DIR = HERE.parent / "store"
8
+
9
+
10
  def create_table():
11
  conn = duckdb.connect(DUCKDB_FILE)
12
  podcasts_create = """CREATE TABLE podcasts (
13
+ id BIGINT PRIMARY KEY,
14
+ title TEXT, date DATE, guests TEXT[], length BIGINT, audio TEXT
15
  );
16
  """
17
  episodes_create = """CREATE TABLE episodes (
18
+ id BIGINT, part BIGINT, start INTERVAL, end_ INTERVAL, text TEXT,
19
  PRIMARY KEY (id, part)
20
  );
21
  """
22
  embeddings_create = """CREATE TABLE embeddings (
23
+ id BIGINT, part BIGINT, embedding FLOAT[1024],
24
  PRIMARY KEY (id, part)
25
  );
26
  """
 
32
  print("Tables created.")
33
 
34
 
35
+ def insert_podcast():
36
+ conn = duckdb.connect(DUCKDB_FILE)
37
+ sql = """INSERT INTO podcasts
38
+ SELECT id, title, date, [], length, audio
39
+ FROM read_parquet(?);
40
+ """
41
+ conn.execute(sql, [str(STORE_DIR / 'podcast-title-list-202301-202501.parquet')])
42
+ conn.commit()
43
+ conn.close()
44
+
45
+
46
  if __name__ == "__main__":
47
  import sys
48
  args = sys.argv
49
  if len(args) == 2:
50
  if args[1] == "create":
51
  create_table()
52
+ elif args[1] == "podcastinsert":
53
+ insert_podcast()
54
  else:
55
  print("Usage: python store.py create")
56
  sys.exit(1)