Spaces:

andaqu
/

ask-reddit-gpt

Runtime error

App Files Files Community

wendru18 commited on May 10, 2023

Commit

7976fe2

1 Parent(s): 15a19f2

refactored gradio, to publish

Browse files

Files changed (8) hide show

README.md +26 -3
app.py +71 -21
imgs/e1.png +0 -0
imgs/e2.png +0 -0
imgs/e3.png +0 -0
imgs/e4.png +0 -0
imgs/overview.png +0 -0
main.ipynb +276 -26

README.md CHANGED Viewed

@@ -6,6 +6,29 @@ AskRedditGPT is a tool that takes in a query, sends it over to Reddit, and retur
 1. Take in query $q$ from user.
 2. Get $N$ topics from $q$ using GPT.
-3. Determine $C$, which is a set of comments best-suited to answer $N$ topics.
-4. Search $q \in C$.
-5. Use GPT to return an answer to user.

 1. Take in query $q$ from user.
 2. Get $N$ topics from $q$ using GPT.
+3. Determine $C$, which is a set of comments concerning $N$ topics and hopefully best-suited to answer $q$.
+4. Search $q \in C$ and use GPT to return an all-encompassing answer.
+## Overview
+The below image is a high-level overview of the project.
+![Overview](imgs/overview.png)
+## Examples
+Example 1:
+![Example 1](imgs/e1.png)
+Example 2:
+![Example 2](imgs/e2.png)
+Example 3:
+![Example 3](imgs/e3.png)
+Example 4:
+![Example 4](imgs/e4.png)

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from langchain.llms import OpenAI
 from tqdm import tqdm
 import pandas as pd
 import gradio as gr
 import openai
 import praw
 import os
@@ -13,6 +14,9 @@ import re
 reddit = None
 bot = None
 chat_history = []
 def set_openai_key(key):
@@ -35,8 +39,10 @@ def set_reddit_keys(client_id, client_secret, user_agent):
 def generate_topics(query, model="gpt-3.5-turbo"):
     messages = [
-        {"role": "user", "content": f"Take this query '{query}' and return a list of 10 simple to understand topics (4 words or less) to input in Search so it returns good results."},
     ]
     response = openai.ChatCompletion.create(
@@ -67,9 +73,9 @@ def get_relevant_comments(topics):
     for topic in tqdm(topics):
         for post in reddit.subreddit("all").search(
-        topic, limit=10):
-            post.comment_limit = 20
             post.comment_sort = "top"
             # Top level comments only
@@ -77,9 +83,9 @@ def get_relevant_comments(topics):
             for comment in post.comments:
                 author = comment.author.name if comment.author else '[deleted]'
-                comments.append([post.id, comment.id, post.subreddit.display_name, post.title, author, comment.body])
-    comments = pd.DataFrame(comments,columns=['source', 'comment_id', 'subreddit', 'title', 'author', 'text'])
     # Drop empty texts or ["deleted"] texts
     comments = comments[comments['text'].str.len() > 0]
@@ -93,22 +99,22 @@ def get_relevant_comments(topics):
     return comments
-def construct_retriever(comments, k=20):
     # Convert comments dataframe to a dictionary
     comments = comments.to_dict('records')
     # Convert comments["text"] to a list of strings
-    texts = [comment["title"] + " " + comment["text"] + " " + comment["subreddit"] for comment in comments]
-    db = Chroma.from_texts(texts, TensorflowHubEmbeddings(model_url="https://tfhub.dev/google/universal-sentence-encoder/4"), metadatas=[{"source": comment["source"], "comment_id": comment["comment_id"], "author": comment["author"]} for comment in comments])
     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
     return retriever
 def construct_bot(retriever):
-    bot = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), retriever, return_source_documents=True)
     return bot
 def get_response(query, chat_history):
@@ -125,16 +131,22 @@ def restart():
     print("Chat history and bot knowledge has been cleared!")
-    return None
-def main(query):
     global chat_history
     global bot
     if chat_history == []:
         print("Bot knowledge has not been initialised yet! Generating topics...")
         topics = generate_topics(query)
         print("Fetching relevant comments...")
         comments = get_relevant_comments(topics)
@@ -151,22 +163,60 @@ def main(query):
     answer, source_documents = response["answer"], response["source_documents"]
-    print(source_documents)
     chat_history.append((query, answer))
-    return "", chat_history
 # Testing only!
-set_openai_key("")
-set_reddit_keys("", "", "")
-with gr.Blocks() as demo:
-    chat_bot = gr.Chatbot()
-    query = gr.Textbox()
-    clear = gr.Button("Clear")
-    query.submit(main, [query], [query, chat_bot])
-    clear.click(restart, None, chat_bot, queue=False)
 demo.launch()

 from tqdm import tqdm
 import pandas as pd
 import gradio as gr
+import datetime
 import openai
 import praw
 import os
 reddit = None
 bot = None
 chat_history = []
+kb = "Bot has no knowledge yet! Please enter an initial query to educate the bot."
+embs = TensorflowHubEmbeddings(model_url="https://tfhub.dev/google/universal-sentence-encoder/4")
 def set_openai_key(key):
 def generate_topics(query, model="gpt-3.5-turbo"):
+    current_date = datetime.datetime.now().strftime("%Y-%m-%d")
     messages = [
+        {"role": "user", "content": f"The current date is {current_date}. Take this query '{query}' and return a list of 10 simple to understand topics (4 words or less) to input in Search so it returns good results."}
     ]
     response = openai.ChatCompletion.create(
     for topic in tqdm(topics):
         for post in reddit.subreddit("all").search(
+        topic, limit=5):
+            post.comment_limit = 10
             post.comment_sort = "top"
             # Top level comments only
             for comment in post.comments:
                 author = comment.author.name if comment.author else '[deleted]'
+                comments.append([post.id, comment.id, post.subreddit.display_name, post.title, author, comment.body, datetime.datetime.fromtimestamp(comment.created).strftime('%Y-%m')])
+    comments = pd.DataFrame(comments,columns=['source', 'comment_id', 'subreddit', 'title', 'author', 'text', 'date'])
     # Drop empty texts or ["deleted"] texts
     comments = comments[comments['text'].str.len() > 0]
     return comments
+def construct_retriever(comments, k=5):
     # Convert comments dataframe to a dictionary
     comments = comments.to_dict('records')
     # Convert comments["text"] to a list of strings
+    texts = [comment["title"] + " " + comment["date"] + ": " + comment["text"] + " " + comment["subreddit"] for comment in comments]
+    db = Chroma.from_texts(texts, embs, metadatas=[{"source": comment["source"], "comment_id": comment["comment_id"], "author": comment["author"], "subreddit": comment["subreddit"], "title": comment["title"]} for comment in comments])
     retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
     return retriever
 def construct_bot(retriever):
+    bot = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), retriever, return_source_documents=True, max_tokens_limit=2000)
     return bot
 def get_response(query, chat_history):
     print("Chat history and bot knowledge has been cleared!")
+    return None, "", "Bot has no knowledge yet! Please enter an initial query to educate the bot."
+def main(query, openAI_key, reddit_client_id, reddit_client_secret, reddit_user_agent):
     global chat_history
     global bot
+    global kb
+    set_openai_key(openAI_key)
+    set_reddit_keys(reddit_client_id, reddit_client_secret, reddit_user_agent)
     if chat_history == []:
         print("Bot knowledge has not been initialised yet! Generating topics...")
         topics = generate_topics(query)
+        kb = "Bot now has knowledge of the following topics: [" + "".join([f"{i+1}. {topic} " for i, topic in enumerate(topics)]) + "]"
         print("Fetching relevant comments...")
         comments = get_relevant_comments(topics)
     answer, source_documents = response["answer"], response["source_documents"]
+    source_urls = "### Sources\n\nThe following contain sources the bot might have used to answer your last query:\n\n" + "\n\n".join([f'[{x.metadata["title"]} (r/{x.metadata["subreddit"]})](https://www.reddit.com/r/{x.metadata["subreddit"]}/comments/{x.metadata["source"]}/comment/{x.metadata["comment_id"]})' for x in source_documents])
     chat_history.append((query, answer))
+    return "", kb, chat_history, source_urls
 # Testing only!
+title = "Ask Reddit GPT 📜"
+with gr.Blocks() as demo:
+        with gr.Group():
+            gr.Markdown(f'<center><h1>{title}</h1></center>')
+            gr.Markdown(f"Ask Reddit GPT allow you to ask about and chat with information found on Reddit. The tool uses the Reddit API to build a database of knowledge (stored in a Chroma database) and LangChain to query it. For each response, a list of potential sources are sent back. The first query you sent will take a while as it will need to build a knowledge base based on the topics concerning such query. Subsequent queries on the same topic will be much faster. If however, you would like to ask a question concerning other topics, you will need to clear out the knowledge base. To do this, click the 'Restart knowledge base' button below.")
+            with gr.Accordion("Instructions", open=False):
+                gr.Markdown('''1. You will need an **Open AI** API key! Get one [here](https://platform.openai.com/account/api-keys).
+                2. You will also need **Reddit** credentials! Steps to obtain them:
+                * Log in to Reddit.
+                * Go [here](https://www.reddit.com/prefs/apps).
+                * Scroll to the bottom.
+                * Click "create another app...".
+                * Fill in the details as you wish, but make sure you select "script" as the type.
+                * Click "create app".
+                * Copy the client ID, client secret, and user agent name and paste them in the boxes below.
+                * All done!
+                ''')
+        with gr.Group():
+            with gr.Accordion("Credentials", open=True):
+                openAI_key=gr.Textbox(label='Enter your OpenAI API key here:')
+                reddit_client_id=gr.Textbox(label='Enter your Reddit client ID here:')
+                reddit_client_secret=gr.Textbox(label='Enter your Reddit client secret here:')
+                reddit_user_agent=gr.Textbox(label='Enter your Reddit user agent here:')
+        with gr.Group():
+            kb = gr.Markdown(kb)
+            chat_bot = gr.Chatbot()
+            query = gr.Textbox()
+            submit = gr.Button("Submit")
+            submit.style(full_width=True)
+            clear = gr.Button("Restart knowledge base")
+            clear.style(full_width=True)
+            sources = gr.Markdown()
+            submit.click(main, [query, openAI_key, reddit_client_id, reddit_client_secret, reddit_user_agent], [query, kb, chat_bot, sources])
+            clear.click(restart, None, [chat_bot, sources, kb], queue=False)
 demo.launch()

imgs/e1.png ADDED Viewed

imgs/e2.png ADDED Viewed

imgs/e3.png ADDED Viewed

imgs/e4.png ADDED Viewed

imgs/overview.png ADDED Viewed

main.ipynb CHANGED Viewed

@@ -2,12 +2,13 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 83,
    "metadata": {},
    "outputs": [],
    "source": [
     "from tqdm import tqdm\n",
     "import pandas as pd\n",
     "import openai\n",
     "import praw\n",
     "import os\n",
@@ -19,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -31,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,7 +53,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +72,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -104,7 +105,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
    "metadata": {},
    "outputs": [
     {
@@ -122,7 +123,7 @@
        " 'Industry specific job listings']"
       ]
      },
-     "execution_count": 88,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -143,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 89,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -162,9 +163,9 @@
     "\n",
     "            for comment in post.comments:\n",
     "                author = comment.author.name if comment.author else '[deleted]'\n",
-    "                comments.append([post.id, comment.id, post.subreddit.display_name, post.title, author, comment.body])\n",
     "\n",
-    "    comments = pd.DataFrame(comments,columns=['source', 'comment_id', 'subreddit', 'title', 'author', 'text'])\n",
     "\n",
     "    # Drop empty texts or [\"deleted\"] texts\n",
     "    comments = comments[comments['text'].str.len() > 0]\n",
@@ -181,14 +182,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 10/10 [00:41<00:00,  4.13s/it]\n"
      ]
     }
    ],
@@ -196,6 +197,235 @@
     "comments = get_relevant_subreddits(topics)"
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -206,7 +436,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -227,7 +457,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
    "metadata": {},
    "outputs": [
     {
@@ -244,7 +474,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -255,7 +485,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -265,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -274,16 +504,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "\" Start by updating your resume and LinkedIn profile to reflect your current skills and experience. Then, look for job postings on job boards like Indeed and LinkedIn, as well as industry-specific job listings. You can also reach out to people you know professionally, such as former colleagues or mentors, to see if they know of any job opportunities. Finally, consider using a staffing agency to help you find a job that's a great fit. Good luck!\""
       ]
      },
-     "execution_count": 97,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -294,25 +524,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='Request: job search tips WAITING FOR THE RIGHT JOB WITH DECENT PAY > QUICK JOB', metadata={'source': '122mdcc', 'comment_id': 'jdv57nv', 'author': 'notenoughbeds'}),\n",
        " Document(page_content='Where to look for jobs? Online job boards? LinkedIn\\n\\nIndeed\\n\\nIf you need a... beginners job. Craigslist has a work section', metadata={'source': 'uj35l8', 'comment_id': 'i7gbuat', 'author': 'No-Statement-3019'}),\n",
        " Document(page_content='Job search tips in Canada You’re selling yourself way too cheap. Look for a senior position and then people will want to hire you more too', metadata={'source': '1156zsa', 'comment_id': 'j91mo9r', 'author': 'pxpxy'}),\n",
        " Document(page_content='Job search tips Just apply, covid has made super easy for RTs to get jobs', metadata={'source': '11uekx5', 'comment_id': 'jcnvfnx', 'author': 'Crass_Cameron'}),\n",
        " Document(page_content='Looking for freelance opportunities >\\tWhere would be the best place to look?\\n\\nPeople you’ve worked with professionally before. Either for jobs for them or for them to refer you to people they know.', metadata={'source': '11q0h1u', 'comment_id': 'jc0w8rp', 'author': 'dataguy24'}),\n",
        " Document(page_content='Did Career Counseling services help you land a job after you graduated? I found it to be helpful in polishing my resume', metadata={'source': 'xbql4p', 'comment_id': 'io1141n', 'author': 'avo_cado'}),\n",
        " Document(page_content=\"Does anyone have any good job search tips? I recommend keeping an up to date LinkedIn profile that indicates you're actively searching for roles. I've also had luck with Indeed.\\n\\nDepending on your field, I recommend a staffing agency.  They can vouch for you and place you with a company that's a great fit.\", metadata={'source': '134wnu5', 'comment_id': 'jih9zzp', 'author': 'Carolinablue87'}),\n",
-       " Document(page_content='How did you find your job? Did someone you know tell you about it? Job listing aggregator (ie indeed, simplyhired)? Industry specific job listings? Somehow else? strong institutional connection between the employer and my grad school', metadata={'source': '88vquy', 'comment_id': 'dwnknrk', 'author': 'lollersauce914'}),\n",
-       " Document(page_content='I (17 F) am really confused about my career. Is there any way I can get career counseling services for free? Any resources or tips would be appreciated too! What are you confused about? I have no qualifications, just a working professional of 8 years. \\nDoes your work have a subreddit or do you have a mentor at work that you could reach out to?', metadata={'source': '10jv7c4', 'comment_id': 'j5mtxb7', 'author': 'bhop02'}),\n",
-       " Document(page_content=\"Debating whether or not to pay for access to sites that have specific job listings - any successes or tales of caution? I never had to look for my jobs through those sites. I don't think they can let the companies post job positions exclusively on their sites only. You can always find the same job post somewhere else if you keep trying. I hope this is helpful, good luck!\", metadata={'source': '11kppd0', 'comment_id': 'jb8qvjo', 'author': 'Whitney-Sweet'})]"
       ]
      },
-     "execution_count": 98,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -320,6 +550,26 @@
    "source": [
     "result[\"source_documents\"]"
    ]
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
     "from tqdm import tqdm\n",
     "import pandas as pd\n",
+    "import datetime\n",
     "import openai\n",
     "import praw\n",
     "import os\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
        " 'Industry specific job listings']"
       ]
      },
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
     "\n",
     "            for comment in post.comments:\n",
     "                author = comment.author.name if comment.author else '[deleted]'\n",
+    "                comments.append([post.id, comment.id, post.subreddit.display_name, post.title, author, comment.body, datetime.datetime.fromtimestamp(comment.created).strftime('%Y-%m')])\n",
     "\n",
+    "    comments = pd.DataFrame(comments,columns=['source', 'comment_id', 'subreddit', 'title', 'author', 'text', 'date'])\n",
     "\n",
     "    # Drop empty texts or [\"deleted\"] texts\n",
     "    comments = comments[comments['text'].str.len() > 0]\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|██████████| 10/10 [00:40<00:00,  4.05s/it]\n"
      ]
     }
    ],
     "comments = get_relevant_subreddits(topics)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>source</th>\n",
+       "      <th>comment_id</th>\n",
+       "      <th>subreddit</th>\n",
+       "      <th>title</th>\n",
+       "      <th>author</th>\n",
+       "      <th>text</th>\n",
+       "      <th>date</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>106m8e3</td>\n",
+       "      <td>j3hcif8</td>\n",
+       "      <td>funny</td>\n",
+       "      <td>Job search tips, update your profile.</td>\n",
+       "      <td>saltinstiens_monster</td>\n",
+       "      <td>\"Assistant Emperor?\"\\n\\n\"Assistant *to the* Emperor.\"</td>\n",
+       "      <td>2023-01-08 17:15:27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>10q4pf6</td>\n",
+       "      <td>j6nz560</td>\n",
+       "      <td>recruitinghell</td>\n",
+       "      <td>“Job Search Tips From A Recruiter” thoughts?</td>\n",
+       "      <td>[deleted]</td>\n",
+       "      <td>1. I work in tech. 99% of the time, tech recruiters don't know anything about the skills the tea...</td>\n",
+       "      <td>2023-01-31 19:00:15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>134wnu5</td>\n",
+       "      <td>jih9zzp</td>\n",
+       "      <td>blackladies</td>\n",
+       "      <td>Does anyone have any good job search tips?</td>\n",
+       "      <td>Carolinablue87</td>\n",
+       "      <td>I recommend keeping an up to date LinkedIn profile that indicates you're actively searching for ...</td>\n",
+       "      <td>2023-05-01 22:52:38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>12h0uya</td>\n",
+       "      <td>jfmy0a1</td>\n",
+       "      <td>physicaltherapy</td>\n",
+       "      <td>Job search tips</td>\n",
+       "      <td>tunaman4u2</td>\n",
+       "      <td>Indeed yes. Sell yourself and don’t take the first offer, be prepared to negotiate. Companies wi...</td>\n",
+       "      <td>2023-04-10 02:18:46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>11uekx5</td>\n",
+       "      <td>jcnvfnx</td>\n",
+       "      <td>respiratorytherapy</td>\n",
+       "      <td>Job search tips</td>\n",
+       "      <td>Crass_Cameron</td>\n",
+       "      <td>Just apply, covid has made super easy for RTs to get jobs</td>\n",
+       "      <td>2023-03-18 06:02:52</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>251</th>\n",
+       "      <td>12d4zd4</td>\n",
+       "      <td>jf4w9av</td>\n",
+       "      <td>Birmingham</td>\n",
+       "      <td>Bartender moving from outta state. Does bham restaurant industry use any specific online job sou...</td>\n",
+       "      <td>minorujco</td>\n",
+       "      <td>Croux app used to find some gigs in Birmingham before covid, but haven't used it since. I heard ...</td>\n",
+       "      <td>2023-04-06 04:13:35</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>260</th>\n",
+       "      <td>yn7bze</td>\n",
+       "      <td>iv7ekcc</td>\n",
+       "      <td>biotech</td>\n",
+       "      <td>Best listings to find a job in biotech/pharma industry?</td>\n",
+       "      <td>tomatotornado420</td>\n",
+       "      <td>Linked/indeed. Early career process engineer.</td>\n",
+       "      <td>2022-11-05 22:50:07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>269</th>\n",
+       "      <td>13bhzdm</td>\n",
+       "      <td>jjbacyo</td>\n",
+       "      <td>singapore</td>\n",
+       "      <td>Industries must show support for local training for jobs to be added to shortage occupation list...</td>\n",
+       "      <td>worldcitizensg</td>\n",
+       "      <td>Sorry.. Why ? If industry want more \"EP\" then more incentive not to train the locals so the job ...</td>\n",
+       "      <td>2023-05-08 10:34:15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>273</th>\n",
+       "      <td>138m3nb</td>\n",
+       "      <td>jiypppa</td>\n",
+       "      <td>dataisbeautiful</td>\n",
+       "      <td>[OC] Analyzing 15,963 Job Listings to Uncover the Top Skills for Data Analysts (update)</td>\n",
+       "      <td>restore_democracy</td>\n",
+       "      <td>Excel and SQL?  We haven’t come far in 30 years.</td>\n",
+       "      <td>2023-05-05 16:02:36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>281</th>\n",
+       "      <td>10dvylw</td>\n",
+       "      <td>j4nkg28</td>\n",
+       "      <td>vfx</td>\n",
+       "      <td>Is specializing in a specific type of VFX important to get a better chance at finding a job in t...</td>\n",
+       "      <td>redddcrow</td>\n",
+       "      <td>Most likely yes. unless you want to be a generalist in which case you need to be good at everyth...</td>\n",
+       "      <td>2023-01-17 00:42:44</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>61 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      source comment_id           subreddit  \\\n",
+       "0    106m8e3    j3hcif8               funny   \n",
+       "9    10q4pf6    j6nz560      recruitinghell   \n",
+       "19   134wnu5    jih9zzp         blackladies   \n",
+       "23   12h0uya    jfmy0a1     physicaltherapy   \n",
+       "28   11uekx5    jcnvfnx  respiratorytherapy   \n",
+       "..       ...        ...                 ...   \n",
+       "251  12d4zd4    jf4w9av          Birmingham   \n",
+       "260   yn7bze    iv7ekcc             biotech   \n",
+       "269  13bhzdm    jjbacyo           singapore   \n",
+       "273  138m3nb    jiypppa     dataisbeautiful   \n",
+       "281  10dvylw    j4nkg28                 vfx   \n",
+       "\n",
+       "                                                                                                   title  \\\n",
+       "0                                                                  Job search tips, update your profile.   \n",
+       "9                                                           “Job Search Tips From A Recruiter” thoughts?   \n",
+       "19                                                            Does anyone have any good job search tips?   \n",
+       "23                                                                                       Job search tips   \n",
+       "28                                                                                       Job search tips   \n",
+       "..                                                                                                   ...   \n",
+       "251  Bartender moving from outta state. Does bham restaurant industry use any specific online job sou...   \n",
+       "260                                              Best listings to find a job in biotech/pharma industry?   \n",
+       "269  Industries must show support for local training for jobs to be added to shortage occupation list...   \n",
+       "273              [OC] Analyzing 15,963 Job Listings to Uncover the Top Skills for Data Analysts (update)   \n",
+       "281  Is specializing in a specific type of VFX important to get a better chance at finding a job in t...   \n",
+       "\n",
+       "                   author  \\\n",
+       "0    saltinstiens_monster   \n",
+       "9               [deleted]   \n",
+       "19         Carolinablue87   \n",
+       "23             tunaman4u2   \n",
+       "28          Crass_Cameron   \n",
+       "..                    ...   \n",
+       "251             minorujco   \n",
+       "260      tomatotornado420   \n",
+       "269        worldcitizensg   \n",
+       "273     restore_democracy   \n",
+       "281             redddcrow   \n",
+       "\n",
+       "                                                                                                    text  \\\n",
+       "0                                                  \"Assistant Emperor?\"\\n\\n\"Assistant *to the* Emperor.\"   \n",
+       "9    1. I work in tech. 99% of the time, tech recruiters don't know anything about the skills the tea...   \n",
+       "19   I recommend keeping an up to date LinkedIn profile that indicates you're actively searching for ...   \n",
+       "23   Indeed yes. Sell yourself and don’t take the first offer, be prepared to negotiate. Companies wi...   \n",
+       "28                                             Just apply, covid has made super easy for RTs to get jobs   \n",
+       "..                                                                                                   ...   \n",
+       "251  Croux app used to find some gigs in Birmingham before covid, but haven't used it since. I heard ...   \n",
+       "260                                                        Linked/indeed. Early career process engineer.   \n",
+       "269  Sorry.. Why ? If industry want more \"EP\" then more incentive not to train the locals so the job ...   \n",
+       "273                                                     Excel and SQL?  We haven’t come far in 30 years.   \n",
+       "281  Most likely yes. unless you want to be a generalist in which case you need to be good at everyth...   \n",
+       "\n",
+       "                   date  \n",
+       "0   2023-01-08 17:15:27  \n",
+       "9   2023-01-31 19:00:15  \n",
+       "19  2023-05-01 22:52:38  \n",
+       "23  2023-04-10 02:18:46  \n",
+       "28  2023-03-18 06:02:52  \n",
+       "..                  ...  \n",
+       "251 2023-04-06 04:13:35  \n",
+       "260 2022-11-05 22:50:07  \n",
+       "269 2023-05-08 10:34:15  \n",
+       "273 2023-05-05 16:02:36  \n",
+       "281 2023-01-17 00:42:44  \n",
+       "\n",
+       "[61 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "comments"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "\"\\nFinding a new job can be a daunting task, especially in the current climate. My best advice is to start by updating your resume and LinkedIn profile to make sure they are up to date and reflect your current skills and experience. You should also reach out to your network of contacts to let them know you are looking for a new job and ask if they know of any opportunities. Additionally, you should look into job search websites such as Indeed and Craigslist, as well as staffing agencies that specialize in your field. Finally, don't forget to take advantage of any career counseling services that may be available to you. Good luck!\""
       ]
      },
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "[Document(page_content=\"Would a staffing agency be able to provide me with stable income, even if any given employer doesn't decide to hire me after the temp period? It's entirely possible to still go without a job while with a staffing agency. Just depends on what they have available.\", metadata={'source': '12vixm8', 'comment_id': 'jhc0br3', 'author': 'whotiesyourshoes'}),\n",
+       " Document(page_content='Request: job search tips WAITING FOR THE RIGHT JOB WITH DECENT PAY > QUICK JOB', metadata={'source': '122mdcc', 'comment_id': 'jdv57nv', 'author': 'notenoughbeds'}),\n",
        " Document(page_content='Where to look for jobs? Online job boards? LinkedIn\\n\\nIndeed\\n\\nIf you need a... beginners job. Craigslist has a work section', metadata={'source': 'uj35l8', 'comment_id': 'i7gbuat', 'author': 'No-Statement-3019'}),\n",
        " Document(page_content='Job search tips in Canada You’re selling yourself way too cheap. Look for a senior position and then people will want to hire you more too', metadata={'source': '1156zsa', 'comment_id': 'j91mo9r', 'author': 'pxpxy'}),\n",
+       " Document(page_content='What temp agencies hire for UPS and was it a good experience? I saw they will be hiring 95K people for the holidays. I did that gig. I went on for 6 1/2 more years.    Just go down to UPS and apply.  The holiday help means you\\'ll out with the driver.  His truck is gonna be stupid full and you two will work until its done. I live in Minnesota, so mine wasn\\'t an \"easy\" experience. Multiple pants were not even keeping my ass warm.            After that I applied to become a loader. They are more apt to hire you since you did the holiday help. The driver put in a good word for me too.          The pay is good for a temp gig, and you\\'ll get good exercise.', metadata={'source': '2gnjhi', 'comment_id': 'ckks20u', 'author': 'seathian'}),\n",
        " Document(page_content='Job search tips Just apply, covid has made super easy for RTs to get jobs', metadata={'source': '11uekx5', 'comment_id': 'jcnvfnx', 'author': 'Crass_Cameron'}),\n",
        " Document(page_content='Looking for freelance opportunities >\\tWhere would be the best place to look?\\n\\nPeople you’ve worked with professionally before. Either for jobs for them or for them to refer you to people they know.', metadata={'source': '11q0h1u', 'comment_id': 'jc0w8rp', 'author': 'dataguy24'}),\n",
        " Document(page_content='Did Career Counseling services help you land a job after you graduated? I found it to be helpful in polishing my resume', metadata={'source': 'xbql4p', 'comment_id': 'io1141n', 'author': 'avo_cado'}),\n",
        " Document(page_content=\"Does anyone have any good job search tips? I recommend keeping an up to date LinkedIn profile that indicates you're actively searching for roles. I've also had luck with Indeed.\\n\\nDepending on your field, I recommend a staffing agency.  They can vouch for you and place you with a company that's a great fit.\", metadata={'source': '134wnu5', 'comment_id': 'jih9zzp', 'author': 'Carolinablue87'}),\n",
+       " Document(page_content=\"Even jobs at temp agencies aren't hiring This is a hard question to answer without knowing more about your experience but I can try and help. \\n\\nIf I’m reading this correctly there are two jobs, the first one you interviewed for and they clearly decided it wasn’t the right fit. The second one, if it can be either FT or PT, my guess is the agency is probably prioritizing finding someone full-time because they’ll get more financial return on it.\\n\\nTo answer the bigger question though, like I said it is tough without knowing more about your experience but you’ll probably need to find a agency that specializes in staffing PT work. Most firms aren’t going to work on many PT jobs for the reason above from my experience. And most companies aren’t going to pay a firm to find someone to do basic office tasks for 15-20 hours a week. Honest opinion, hope that helps.\", metadata={'source': '114q8oh', 'comment_id': 'j903ge4', 'author': 'Rhombus-Lion-1'})]"
       ]
      },
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    "source": [
     "result[\"source_documents\"]"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result[\"chat_history\"]"
+   ]
   }
  ],
  "metadata": {