Francesco commited on
Commit
db2d292
·
1 Parent(s): 9b3dd41

app with emotions

Browse files
app.py CHANGED
@@ -13,36 +13,33 @@ from langchain.embeddings.openai import OpenAIEmbeddings
13
 
14
  from data import load_db
15
  from names import DATASET_ID, MODEL_ID
16
- import json
17
 
18
  @st.cache_resource
19
  def init():
20
- # embeddings = OpenAIEmbeddings(model=MODEL_ID)
21
- # dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
22
-
23
- # db = load_db(
24
- # dataset_path,
25
- # embedding_function=embeddings,
26
- # token=os.environ["ACTIVELOOP_TOKEN"],
27
- # org_id=os.environ["ACTIVELOOP_ORG_ID"],
28
- # read_only=True,
29
- # )
30
-
31
- with open("/home/zuppif/Documents/Work/ActiveLoop/ai-shazam/data/lyrics_with_spotify_url_and_summary.json", "r") as f:
32
- songs = json.load(f)
33
 
34
  prompt = PromptTemplate(
35
- input_variables=["songs", "user_input"],
36
- template=Path("prompts/bot_with_summary.prompt").read_text(),
37
  )
38
 
39
  llm = ChatOpenAI(temperature=0.7)
40
 
41
  chain = LLMChain(llm=llm, prompt=prompt)
42
 
43
- return chain, songs, json.dumps(songs)
44
 
45
- chain, songs, songs_json = init()
46
 
47
  st.title("Disney song for you")
48
 
@@ -56,18 +53,16 @@ placeholder_emotions = st.empty()
56
  placeholder = st.empty()
57
 
58
  def get_emotions(user_input):
59
- song_name = chain.run(songs=songs_json, user_input=user_input)
60
- song_name.replace('\'', '')
61
- print(f"Song: {song_name}")
62
- song = songs[song_name.lower()]
63
- # matches = db.similarity_search_with_score(emotions, distance_metric="cos")
64
- # print(matches)
65
- # doc, score = matches[0]
66
  iframes_html = ""
67
- # with placeholder_emotions:
68
- # st.write(emotions)
69
  with placeholder:
70
- embed_url = song["embed_url"]
71
  iframe_html = f'<iframe src="{embed_url}" style="border:0"> </iframe>'
72
  st.components.v1.html(f"<div style='display:flex;flex-direction:column'>{iframe_html}</div>")
73
 
 
13
 
14
  from data import load_db
15
  from names import DATASET_ID, MODEL_ID
16
+
17
 
18
  @st.cache_resource
19
  def init():
20
+ embeddings = OpenAIEmbeddings(model=MODEL_ID)
21
+ dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
22
+
23
+ db = load_db(
24
+ dataset_path,
25
+ embedding_function=embeddings,
26
+ token=os.environ["ACTIVELOOP_TOKEN"],
27
+ org_id=os.environ["ACTIVELOOP_ORG_ID"],
28
+ read_only=True,
29
+ )
 
 
 
30
 
31
  prompt = PromptTemplate(
32
+ input_variables=["content"],
33
+ template=Path("prompts/bot.prompt").read_text(),
34
  )
35
 
36
  llm = ChatOpenAI(temperature=0.7)
37
 
38
  chain = LLMChain(llm=llm, prompt=prompt)
39
 
40
+ return db, chain
41
 
42
+ db, chain = init()
43
 
44
  st.title("Disney song for you")
45
 
 
53
  placeholder = st.empty()
54
 
55
  def get_emotions(user_input):
56
+ emotions = chain.run(content=user_input)
57
+ print(f"Emotions: {emotions}")
58
+ matches = db.similarity_search_with_score(emotions, distance_metric="cos")
59
+ print(matches)
60
+ doc, score = matches[0]
 
 
61
  iframes_html = ""
62
+ with placeholder_emotions:
63
+ st.write(emotions)
64
  with placeholder:
65
+ embed_url = doc.metadata["embed_url"]
66
  iframe_html = f'<iframe src="{embed_url}" style="border:0"> </iframe>'
67
  st.components.v1.html(f"<div style='display:flex;flex-direction:column'>{iframe_html}</div>")
68
 
data.py CHANGED
@@ -45,4 +45,4 @@ def load_db(dataset_path: str, *args, **kwargs) -> DeepLake:
45
 
46
  if __name__ == "__main__":
47
  dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
48
- create_db(dataset_path, "data/lyrics_with_spotify_url.json")
 
45
 
46
  if __name__ == "__main__":
47
  dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
48
+ create_db(dataset_path, "data/emotions_with_spotify_url.json")
data/lyrics_with_spotify_url.json CHANGED
@@ -492,4 +492,4 @@
492
  "embed_url": "https://open.spotify.com/embed/track/0d83zVs3OmdjrhfoAxg1dE?utm_source=generator"
493
  }
494
  ]
495
- }
 
492
  "embed_url": "https://open.spotify.com/embed/track/0d83zVs3OmdjrhfoAxg1dE?utm_source=generator"
493
  }
494
  ]
495
+ }
names.py CHANGED
@@ -1,2 +1,4 @@
1
  MODEL_ID = "text-embedding-ada-002"
2
- DATASET_ID = "disney-lyrics"
 
 
 
1
  MODEL_ID = "text-embedding-ada-002"
2
+ # DATASET_ID = "disney-lyrics"
3
+ DATASET_ID = "disney-lyrics-emotions"
4
+
prompts/bot.prompt CHANGED
@@ -6,4 +6,4 @@ Input: "I am very tired today and I am not feeling weel"
6
  Output: "Exhaustion, Discomfort, and Fatigue"
7
 
8
  If the sentence is too short, you can also suggest just one or two emotions.
9
- Please, suggest emotions for input = "{content}", reply ONLY with a max of FOUR emotions.
 
6
  Output: "Exhaustion, Discomfort, and Fatigue"
7
 
8
  If the sentence is too short, you can also suggest just one or two emotions.
9
+ Please, suggest emotions for input = "{content}", reply ONLY with a max of 4 emotions.
prompts/bot_with_summary.prompt CHANGED
@@ -4,7 +4,7 @@ You the following list of songs:
4
 
5
  {songs}
6
 
7
- Given a input, output the song name that matches its content. Just the song name
8
 
9
  For example:
10
  Input: "Today I am not feeling great"
 
4
 
5
  {songs}
6
 
7
+ Given a input, output the song name that its content vibe, emotions, theme. Just the song name. If the content containes negative sentiment, please provide a song name to help the user instead.
8
 
9
  For example:
10
  Input: "Today I am not feeling great"
scripts/create_one_sentence_summary.py CHANGED
@@ -1,14 +1,16 @@
1
  from dotenv import load_dotenv
2
- load_dotenv()
3
 
4
- from langchain.chains import LLMChain
5
- from langchain.prompts import PromptTemplate
6
- from pathlib import Path
7
- from langchain.chat_models import ChatOpenAI
8
  import json
9
  from collections import defaultdict
 
10
  from pprint import pprint
11
 
 
 
 
 
12
  prompt = PromptTemplate(
13
  input_variables=["song"],
14
  template=Path("prompts/summary.prompt").read_text(),
@@ -18,7 +20,10 @@ llm = ChatOpenAI(temperature=0)
18
 
19
  chain = LLMChain(llm=llm, prompt=prompt)
20
 
21
- with open("/home/zuppif/Documents/Work/ActiveLoop/ai-shazam/data/lyrics_with_spotify_url.json", "r") as f:
 
 
 
22
  data = json.load(f)
23
 
24
  lyrics_summaries = {}
@@ -26,10 +31,16 @@ lyrics_summaries = {}
26
  for movie, lyrics in data.items():
27
  for lyric in lyrics:
28
  print(f"Creating summary for {lyric['name']}")
29
- summary = chain.run(song=lyric['text'])
30
- lyrics_summaries[lyric['name'].lower()] = {"summary": summary, "embed_url": lyric["embed_url"] }
31
-
32
- with open("/home/zuppif/Documents/Work/ActiveLoop/ai-shazam/data/lyrics_with_spotify_url_and_summary.json", "w") as f:
 
 
 
 
 
 
33
  json.dump(lyrics_summaries, f)
34
 
35
- pprint(lyrics_summaries)
 
1
  from dotenv import load_dotenv
 
2
 
3
+ load_dotenv()
4
+
 
 
5
  import json
6
  from collections import defaultdict
7
+ from pathlib import Path
8
  from pprint import pprint
9
 
10
+ from langchain.chains import LLMChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.prompts import PromptTemplate
13
+
14
  prompt = PromptTemplate(
15
  input_variables=["song"],
16
  template=Path("prompts/summary.prompt").read_text(),
 
20
 
21
  chain = LLMChain(llm=llm, prompt=prompt)
22
 
23
+ with open(
24
+ "/home/zuppif/Documents/Work/ActiveLoop/ai-shazam/data/lyrics_with_spotify_url.json",
25
+ "r",
26
+ ) as f:
27
  data = json.load(f)
28
 
29
  lyrics_summaries = {}
 
31
  for movie, lyrics in data.items():
32
  for lyric in lyrics:
33
  print(f"Creating summary for {lyric['name']}")
34
+ summary = chain.run(song=lyric["text"])
35
+ lyrics_summaries[lyric["name"].lower()] = {
36
+ "summary": summary,
37
+ "embed_url": lyric["embed_url"],
38
+ }
39
+
40
+ with open(
41
+ "/home/zuppif/Documents/Work/ActiveLoop/ai-shazam/data/lyrics_with_spotify_url_and_summary.json",
42
+ "w",
43
+ ) as f:
44
  json.dump(lyrics_summaries, f)
45
 
46
+ pprint(lyrics_summaries)
scripts/keep_only_lyrics_on_spotify.py CHANGED
@@ -44,7 +44,9 @@ for movie, lyrics in data.items():
44
  for lyric in lyrics:
45
  name = lyric["name"].lower()
46
  if name in spotify_tracks:
47
- data_filtered[movie].append({**lyric, **{ 'embed_url' : spotify_tracks[name]['embed_url']}})
 
 
48
  tot += 1
49
  print(tot)
50
 
 
44
  for lyric in lyrics:
45
  name = lyric["name"].lower()
46
  if name in spotify_tracks:
47
+ data_filtered[movie].append(
48
+ {**lyric, **{"embed_url": spotify_tracks[name]["embed_url"]}}
49
+ )
50
  tot += 1
51
  print(tot)
52
 
temp.ipynb CHANGED
@@ -13,7 +13,7 @@
13
  },
14
  {
15
  "cell_type": "code",
16
- "execution_count": 2,
17
  "id": "b1a6a020",
18
  "metadata": {
19
  "scrolled": true
@@ -23,16 +23,14 @@
23
  "name": "stderr",
24
  "output_type": "stream",
25
  "text": [
26
- "/home/zuppif/miniconda3/envs/activeloop/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.4.3) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
27
- " warnings.warn(\n",
28
- "-"
29
  ]
30
  },
31
  {
32
  "name": "stdout",
33
  "output_type": "stream",
34
  "text": [
35
- "This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/zuppif/disney-lyrics\n",
36
  "\n"
37
  ]
38
  },
@@ -40,17 +38,17 @@
40
  "name": "stderr",
41
  "output_type": "stream",
42
  "text": [
43
- "|"
44
  ]
45
  },
46
  {
47
  "name": "stdout",
48
  "output_type": "stream",
49
  "text": [
50
- "hub://zuppif/disney-lyrics loaded successfully.\n",
51
  "\n",
52
- "Deep Lake Dataset in hub://zuppif/disney-lyrics already exists, loading from the storage\n",
53
- "Dataset(path='hub://zuppif/disney-lyrics', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
54
  "\n",
55
  " tensor htype shape dtype compression\n",
56
  " ------- ------- ------- ------- ------- \n",
@@ -91,7 +89,7 @@
91
  },
92
  {
93
  "cell_type": "code",
94
- "execution_count": 80,
95
  "id": "07d8a381",
96
  "metadata": {},
97
  "outputs": [],
@@ -112,56 +110,43 @@
112
  },
113
  {
114
  "cell_type": "code",
115
- "execution_count": 81,
116
  "id": "ebca722d",
117
  "metadata": {},
118
  "outputs": [
119
  {
120
  "data": {
121
  "text/plain": [
122
- "'Melancholy, Coziness, Nostalgia, Calmness.'"
123
  ]
124
  },
125
- "execution_count": 81,
126
  "metadata": {},
127
  "output_type": "execute_result"
128
  }
129
  ],
130
  "source": [
131
- "emotions = chain.run(content=\"It's rainy\")\n",
132
  "emotions"
133
  ]
134
  },
135
  {
136
  "cell_type": "code",
137
- "execution_count": 84,
138
  "id": "9598a36c",
139
  "metadata": {
140
  "scrolled": false
141
  },
142
  "outputs": [
143
  {
144
- "data": {
145
- "text/plain": [
146
- "'https://open.spotify.com/embed/track/5EeQQ8BVJTRkp1AIKJILGY?utm_source=generator'"
147
- ]
148
- },
149
- "execution_count": 84,
150
- "metadata": {},
151
- "output_type": "execute_result"
152
- }
153
- ],
154
- "source": [
155
- "doc, score = db.similarity_search_with_score(emotions, distance_metric=\"cos\")[0]\n",
156
- "doc.metadata[\"embed_url\"]"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": 83,
162
- "id": "d6214e40",
163
- "metadata": {},
164
- "outputs": [
165
  {
166
  "data": {
167
  "text/html": [
@@ -169,7 +154,7 @@
169
  " <iframe\n",
170
  " width=\"700\"\n",
171
  " height=\"350\"\n",
172
- " src=\"https://open.spotify.com/embed/track/5EeQQ8BVJTRkp1AIKJILGY?utm_source=generator\"\n",
173
  " frameborder=\"0\"\n",
174
  " allowfullscreen\n",
175
  " \n",
@@ -177,48 +162,112 @@
177
  " "
178
  ],
179
  "text/plain": [
180
- "<IPython.lib.display.IFrame at 0x7fb0be920a00>"
181
  ]
182
  },
183
- "execution_count": 83,
184
  "metadata": {},
185
  "output_type": "execute_result"
186
  }
187
  ],
188
  "source": [
189
- "doc.metadata[\"embed_url\"]\n",
 
 
 
 
190
  "\n",
191
  "from IPython.display import IFrame\n",
192
  "IFrame(doc.metadata[\"embed_url\"], width=700, height=350)"
193
  ]
194
  },
 
 
 
 
 
 
 
 
195
  {
196
  "cell_type": "code",
197
- "execution_count": 4,
198
- "id": "28ae2c63",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  "metadata": {
200
  "scrolled": true
201
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  "outputs": [
203
  {
204
  "data": {
205
  "text/plain": [
206
- "Dataset(path='hub://zuppif/disney-lyrics', read_only=True, index=Index([()]), tensors=['embedding', 'ids', 'metadata', 'text'])"
207
  ]
208
  },
209
- "execution_count": 4,
210
  "metadata": {},
211
  "output_type": "execute_result"
212
  }
213
  ],
214
  "source": [
215
- "db.ds.query(\"select * where contains(\\\"text\\\", 'Did they') limit 2\")"
216
  ]
217
  },
218
  {
219
  "cell_type": "code",
220
  "execution_count": null,
221
- "id": "1780552c",
222
  "metadata": {},
223
  "outputs": [],
224
  "source": []
 
13
  },
14
  {
15
  "cell_type": "code",
16
+ "execution_count": 74,
17
  "id": "b1a6a020",
18
  "metadata": {
19
  "scrolled": true
 
23
  "name": "stderr",
24
  "output_type": "stream",
25
  "text": [
26
+ "/"
 
 
27
  ]
28
  },
29
  {
30
  "name": "stdout",
31
  "output_type": "stream",
32
  "text": [
33
+ "This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/zuppif/disney-lyrics-emotions\n",
34
  "\n"
35
  ]
36
  },
 
38
  "name": "stderr",
39
  "output_type": "stream",
40
  "text": [
41
+ "-"
42
  ]
43
  },
44
  {
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
+ "hub://zuppif/disney-lyrics-emotions loaded successfully.\n",
49
  "\n",
50
+ "Deep Lake Dataset in hub://zuppif/disney-lyrics-emotions already exists, loading from the storage\n",
51
+ "Dataset(path='hub://zuppif/disney-lyrics-emotions', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
52
  "\n",
53
  " tensor htype shape dtype compression\n",
54
  " ------- ------- ------- ------- ------- \n",
 
89
  },
90
  {
91
  "cell_type": "code",
92
+ "execution_count": 75,
93
  "id": "07d8a381",
94
  "metadata": {},
95
  "outputs": [],
 
110
  },
111
  {
112
  "cell_type": "code",
113
+ "execution_count": 76,
114
  "id": "ebca722d",
115
  "metadata": {},
116
  "outputs": [
117
  {
118
  "data": {
119
  "text/plain": [
120
+ "'Exhaustion, Fatigue, Sleepiness, Drained.'"
121
  ]
122
  },
123
+ "execution_count": 76,
124
  "metadata": {},
125
  "output_type": "execute_result"
126
  }
127
  ],
128
  "source": [
129
+ "emotions = chain.run(content=\"Damn I am feeling so tired\")\n",
130
  "emotions"
131
  ]
132
  },
133
  {
134
  "cell_type": "code",
135
+ "execution_count": 77,
136
  "id": "9598a36c",
137
  "metadata": {
138
  "scrolled": false
139
  },
140
  "outputs": [
141
  {
142
+ "name": "stdout",
143
+ "output_type": "stream",
144
+ "text": [
145
+ "[(Document(page_content='Hopeful, determined, inspired, optimistic, longing, driven, passionate, adventurous.', metadata={'movie': 'Hercules', 'name': 'Go the Distance', 'embed_url': 'https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator'}), 0.8135085701942444), (Document(page_content='upset, mad, regret, sad, fine, longing, hopeful, impatient', metadata={'movie': 'Encanto', 'name': 'Waiting on a Miracle', 'embed_url': 'https://open.spotify.com/embed/track/3oRW9ZGPRbLRMneQ5lwflt?utm_source=generator'}), 0.8108540177345276), (Document(page_content='nasty, repentant, magic, sad, lonely, bored, withdrawn, busy', metadata={'movie': 'The Little Mermaid', 'name': 'Poor Unfortunate Souls', 'embed_url': 'https://open.spotify.com/embed/track/7zsw78LtXUD7JfEwH64HK2?utm_source=generator'}), 0.8080281615257263), (Document(page_content='hopeful, optimistic, dreamy, inspired, happy, content, fulfilled, grateful', metadata={'movie': 'Pinocchio', 'name': 'When You Wish Upon a Star', 'embed_url': 'https://open.spotify.com/embed/track/1WrPa4lrIddctGWAIYYfP9?utm_source=generator'}), 0.8055723309516907)]\n",
146
+ "https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator\n",
147
+ "page_content='Hopeful, determined, inspired, optimistic, longing, driven, passionate, adventurous.' metadata={'movie': 'Hercules', 'name': 'Go the Distance', 'embed_url': 'https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator'}\n"
148
+ ]
149
+ },
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  {
151
  "data": {
152
  "text/html": [
 
154
  " <iframe\n",
155
  " width=\"700\"\n",
156
  " height=\"350\"\n",
157
+ " src=\"https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator\"\n",
158
  " frameborder=\"0\"\n",
159
  " allowfullscreen\n",
160
  " \n",
 
162
  " "
163
  ],
164
  "text/plain": [
165
+ "<IPython.lib.display.IFrame at 0x7f1890ed7430>"
166
  ]
167
  },
168
+ "execution_count": 77,
169
  "metadata": {},
170
  "output_type": "execute_result"
171
  }
172
  ],
173
  "source": [
174
+ "matches = db.similarity_search_with_score(emotions, distance_metric=\"cos\")\n",
175
+ "print(matches)\n",
176
+ "doc, score = matches[0]\n",
177
+ "print(doc.metadata[\"embed_url\"])\n",
178
+ "print(doc)\n",
179
  "\n",
180
  "from IPython.display import IFrame\n",
181
  "IFrame(doc.metadata[\"embed_url\"], width=700, height=350)"
182
  ]
183
  },
184
+ {
185
+ "cell_type": "markdown",
186
+ "id": "e59a45b7",
187
+ "metadata": {},
188
+ "source": [
189
+ "## Using all the songs emotions in the prommpt"
190
+ ]
191
+ },
192
  {
193
  "cell_type": "code",
194
+ "execution_count": 66,
195
+ "id": "7b10a5f2",
196
+ "metadata": {},
197
+ "outputs": [],
198
+ "source": [
199
+ "import json\n",
200
+ "\n",
201
+ "prompt = PromptTemplate(\n",
202
+ " input_variables=[\"songs\", \"user_input\"],\n",
203
+ " template=Path(\"prompts/bot_with_summary.prompt\").read_text(),\n",
204
+ ")\n",
205
+ "\n",
206
+ "llm = ChatOpenAI(temperature=0.7)\n",
207
+ "\n",
208
+ "chain = LLMChain(llm=llm, prompt=prompt)"
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "markdown",
213
+ "id": "70b8c445",
214
+ "metadata": {},
215
+ "source": [
216
+ "Let's create the songs string"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 67,
222
+ "id": "36df1c27",
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "with open(\"data/emotions_with_spotify_url.json\", \"r\") as f:\n",
227
+ " data = json.load(f)"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": 68,
233
+ "id": "e3e496da",
234
  "metadata": {
235
  "scrolled": true
236
  },
237
+ "outputs": [],
238
+ "source": [
239
+ "songs_str = \"\"\n",
240
+ "\n",
241
+ "for movie, songs in data.items():\n",
242
+ " for song in songs:\n",
243
+ " songs_str += f\"{song['name']}: {song['text']}\\n\""
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": 71,
249
+ "id": "070b09d8",
250
+ "metadata": {},
251
  "outputs": [
252
  {
253
  "data": {
254
  "text/plain": [
255
+ "'Zero to Hero'"
256
  ]
257
  },
258
+ "execution_count": 71,
259
  "metadata": {},
260
  "output_type": "execute_result"
261
  }
262
  ],
263
  "source": [
264
+ "chain.run(songs=songs_str, user_input=\"Damn I am feeling so tired\")"
265
  ]
266
  },
267
  {
268
  "cell_type": "code",
269
  "execution_count": null,
270
+ "id": "b6c6e90c",
271
  "metadata": {},
272
  "outputs": [],
273
  "source": []