Spaces:

zinoubm
/

ChatBotInterface

Sleeping

App Files Files Community

zinoubm commited on Mar 4, 2023

Commit

2d65d8a

1 Parent(s): 0d80d9a

adding the asking functionality

Browse files

Files changed (19) hide show

.gitignore +2 -1
TwitterChatBot/__init__.py +0 -0
TwitterChatBot/chat.py +55 -0
TwitterChatBot/gpt_3_manager.py +32 -0
TwitterChatBot/index.py +37 -0
TwitterChatBot/main.py +28 -0
TwitterChatBot/prompt.py +57 -0
TwitterChatBot/tests/chat_test.py +31 -0
TwitterChatBot/tests/gpt_3_manager_test.py +21 -0
TwitterChatBot/tests/index_test.py +30 -0
TwitterChatBot/tests/prompt_test.py +62 -0
TwitterChatBot/tests/utils_test.py +14 -0
TwitterChatBot/utils.py +17 -0
app.py +17 -16
index/build_index.py +37 -0
index/index.jsonl +0 -0
prompts/passage_summarization.txt +4 -0
prompts/question_answering.txt +7 -0
requirements.txt +3 -1

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 .env
-env/

 .env
+env/
+__pycache__/

TwitterChatBot/__init__.py ADDED Viewed

File without changes

TwitterChatBot/chat.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import openai
+from pathlib import Path
+from dotenv import load_dotenv
+from TwitterChatBot.index import IndexSearchEngine
+from TwitterChatBot.prompt import (
+    QuestionAnsweringPrompt,
+    PassageSummarizationPrompt,
+    TextPromptLoader,
+)
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai.api_key = OPENAI_API_KEY
+class ChatBot:
+    def __init__(
+        self, index_search_engine: IndexSearchEngine, prompt_loader, gpt_manager
+    ):
+        self.index_search_engine = index_search_engine
+        self.prompet_loader = prompt_loader
+        self.gpt_manager = gpt_manager
+    def ask(self, question):
+        search_result = self.index_search_engine.search(question=question, count=2)
+        answers = []
+        for result in search_result:
+            question_answering_prompt = QuestionAnsweringPrompt(
+                passage=result, question=question, prompt_loader=self.prompet_loader
+            )
+            prompt = question_answering_prompt.load(
+                Path("./prompts") / "question_answering.txt"
+            )
+            answer = self.gpt_manager.get_completion(
+                prompt=prompt, max_tokens=80, model="text-curie-001"
+            )
+            answers.append(answer)
+        passage_summarization_prompt = PassageSummarizationPrompt(
+            "\n".join(answers), self.prompet_loader
+        )
+        prompt = passage_summarization_prompt.load(
+            Path("./prompts") / "passage_summarization.txt"
+        )
+        final_answer = self.gpt_manager.get_completion(prompt=prompt)
+        return final_answer

TwitterChatBot/gpt_3_manager.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import openai
+class Gpt3Manager:
+    def __init__(self, api_key):
+        openai.api_key = api_key
+    def get_completion(self, prompt, max_tokens=128, model="text-davinci-003"):
+        response = None
+        try:
+            response = openai.Completion.create(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                model=model,
+            )["choices"][0]["text"]
+        except Exception as err:
+            print(f"Sorry, There was a problem \n\n {err}")
+        return response
+    def get_embedding(self, prompt, model="text-similarity-ada-001"):
+        prompt = prompt.replace("\n", " ")
+        embedding = None
+        try:
+            embedding = openai.Embedding.create(input=[prompt], model=model)["data"][0][
+                "embedding"
+            ]
+        except Exception as err:
+            print(f"Sorry, There was a problem {err}")
+        return embedding

TwitterChatBot/index.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from abc import ABC, abstractmethod
+import jsonlines
+from TwitterChatBot.utils import dot_similarity
+class Index(ABC):
+    @abstractmethod
+    def load(self, path):
+        pass
+class JsonLinesIndex(Index):
+    def load(self, path):
+        with jsonlines.open(path) as passages:
+            indexes = list(passages)
+        return indexes
+class IndexSearchEngine:
+    def __init__(self, indexes, gpt_manager):
+        self.indexes = indexes
+        self.gpt_manager = gpt_manager
+    def search(self, question, count=4):
+        question_embedding = self.gpt_manager.get_embedding(prompt=question)
+        simmilarities = []
+        for index in self.indexes:
+            embedding = index["embedding"]
+            score = dot_similarity(question_embedding, embedding)
+            simmilarities.append({"index": index, "score": score})
+            sorted_similarities = sorted(
+                simmilarities, key=lambda x: x["score"], reverse=True
+            )
+        return [result["index"]["content"] for result in sorted_similarities[:count]]

TwitterChatBot/main.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from TwitterChatBot.index import IndexSearchEngine
+from TwitterChatBot.gpt_3_manager import Gpt3Manager
+from TwitterChatBot.chat import ChatBot
+from TwitterChatBot.index import JsonLinesIndex
+from TwitterChatBot.prompt import TextPromptLoader
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+path = Path("./index") / "index.jsonl"
+def ask(question):
+    index = JsonLinesIndex()
+    loaded = index.load(path)
+    gpt_manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+    engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
+    loader = TextPromptLoader()
+    chatbot = ChatBot(engine, prompt_loader=loader, gpt_manager=gpt_manager)
+    answer = chatbot.ask(question)
+    return answer

TwitterChatBot/prompt.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from abc import ABC, abstractmethod
+# Prompt Loaders
+class PromptLoader(ABC):
+    @abstractmethod
+    def load_prompt(self, path):
+        pass
+class TextPromptLoader(PromptLoader):
+    def load_prompt(self, path):
+        with open(path) as f:
+            lines = f.readlines()
+            return "".join(lines)
+# Prompts
+class Prompt(ABC):
+    def __init__(self, prompt_loader: PromptLoader):
+        self.prompt_loader = prompt_loader
+    def load_prompt(self, path):
+        return self.prompt_loader.load_prompt(path)
+    @abstractmethod
+    def load(self, path):
+        pass
+class QuestionAnsweringPrompt(Prompt):
+    def __init__(self, passage, question, prompt_loader):
+        super().__init__(prompt_loader=prompt_loader)
+        self.passage = passage
+        self.question = question
+    # trust me, you'll need this later
+    # .replace("<<PASSAGE>>", self.result["index"]["content"])
+    def load(self, path):
+        prompt = (
+            self.load_prompt(path)
+            .replace("<<PASSAGE>>", self.passage)
+            .replace("<<QUESTION>>", self.question)
+        )
+        return prompt
+class PassageSummarizationPrompt(Prompt):
+    def __init__(self, passage, prompt_loader):
+        super().__init__(prompt_loader=prompt_loader)
+        self.passage = passage
+    # prompt = self.load_prompt(path).replace("<<PASSAGE>>", )
+    def load(self, path):
+        prompt = self.load_prompt(path).replace("<<PASSAGE>>", self.passage)
+        return prompt

TwitterChatBot/tests/chat_test.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+from pathlib import Path
+from index import IndexSearchEngine
+from gpt_3_manager import Gpt3Manager
+from dotenv import load_dotenv
+from chat import ChatBot
+from index import JsonLinesIndex
+from prompt import TextPromptLoader
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+def test_chatbot():
+    path = Path("index") / "index.jsonl"
+    index = JsonLinesIndex()
+    loaded = index.load(path)
+    gpt_manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+    engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
+    loader = TextPromptLoader()
+    chatbot = ChatBot(engine, prompt_loader=loader, gpt_manager=gpt_manager)
+    answer = chatbot.ask("What does the twitter terms of service does")
+    assert answer != None

TwitterChatBot/tests/gpt_3_manager_test.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from dotenv import load_dotenv
+from gpt_3_manager import Gpt3Manager
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+def test_gpt3_completion():
+    manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+    request = manager.get_completion(
+        prompt="This is a testing prompt", max_tokens=10, model="text-ada-001"
+    )
+    assert request != None
+def test_gpt3_embedding():
+    manager = Gpt3Manager(api_key=OPENAI_API_KEY)
+    request = manager.get_embedding(prompt="This is a testing prompt")
+    assert request != None

TwitterChatBot/tests/index_test.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+from index import JsonLinesIndex, IndexSearchEngine
+from gpt_3_manager import Gpt3Manager
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+def test_jsonlines_index():
+    path = Path("index") / "index.jsonl"
+    index = JsonLinesIndex()
+    result = index.load(path)
+    assert result != None
+def test_index_serach_engine():
+    path = Path("index") / "index.jsonl"
+    gpt_manager = Gpt3Manager(OPENAI_API_KEY)
+    index = JsonLinesIndex()
+    loaded = index.load(path)
+    engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
+    results = engine.search(question="What does the twitter tos does")
+    assert results != None

TwitterChatBot/tests/prompt_test.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from pathlib import Path
+from prompt import QuestionAnsweringPrompt, PassageSummarizationPrompt, TextPromptLoader
+def test_text_prompt_loader():
+    path = Path("prompts") / "question_answering.txt"
+    prompt_loader = TextPromptLoader()
+    prompt = prompt_loader.load_prompt(path)
+    testing_prompt = (
+        "Use the passage to write a detailed answer to the following question\n"
+        "\n"
+        "passage: <<PASSAGE>>\n"
+        "\n"
+        "question: <<QUESTION>>\n"
+        "\n"
+        "answer:"
+    )
+    assert prompt == testing_prompt
+def test_question_answering_prompt():
+    path = Path("prompts") / "question_answering.txt"
+    passage = "Hi, I'm foo and I love cycling and programming"
+    question = "What is foo's hobby"
+    prompt_loader = TextPromptLoader()
+    prompt = QuestionAnsweringPrompt(passage, question, prompt_loader)
+    loaded_prompt = prompt.load(path)
+    testing_prompt = (
+        "Use the passage to write a detailed answer to the following question\n"
+        "\n"
+        "passage: Hi, I'm foo and I love cycling and programming\n"
+        "\n"
+        "question: What is foo's hobby\n"
+        "\n"
+        "answer:"
+    )
+    assert loaded_prompt == testing_prompt
+def test_passage_summarization_prompt():
+    path = Path("prompts") / "passage_summarization.txt"
+    passage = "Hi, I'm foo and I love cycling and programming"
+    prompt_loader = TextPromptLoader()
+    prompt = PassageSummarizationPrompt(passage, prompt_loader)
+    loaded_prompt = prompt.load(path)
+    testing_prompt = (
+        "Summarize the following passage in detail\n"
+        "passage: Hi, I'm foo and I love cycling and programming\n"
+        "\n"
+        "summary:"
+    )
+    assert loaded_prompt == testing_prompt

TwitterChatBot/tests/utils_test.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pathlib import Path
+from utils import load_prompt
+def test_load_prompt_default():
+    path = Path("prompts") / "question_answering.txt"
+    with open(path) as f:
+        lines = f.readlines()
+        testing_prompt = "".join(lines)
+    prompt = load_prompt(path)
+    assert prompt == testing_prompt

TwitterChatBot/utils.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import numpy as np
+def load_prompt(path):
+    with open(path) as f:
+        lines = f.readlines()
+        return "".join(lines)
+def cosine_similarity(emb1, emb2):
+    return np.dot(emb1, emb2) / (
+        (np.dot(emb1, emb1) ** 0.5) * (np.dot(emb2, emb2) ** 0.5)
+    )
+def dot_similarity(emb1, emb2):
+    return np.dot(emb1, emb2)

app.py CHANGED Viewed

@@ -1,26 +1,27 @@
 import os
 import gradio as gr
 import requests
-from dotenv import load_dotenv
-load_dotenv()
-url = os.getenv("URL")
-def get_answer(question):
-    try:
-        answer = requests.get(
-            url,
-            json={"question": question},
-        )
-    except Exception as err:
-        return f"Sorry there was a problem with {err}, please check your connection and try again."
-    if answer.status_code == 200:
-        return answer.json()["answer"]
-    return "Sorry, We have a problem with our server"
 def predict(input, history=[]):

 import os
 import gradio as gr
 import requests
+from TwitterChatBot.main import ask
+def get_answer(question):
+    answer = ask(question=question)
+    return answer.strip()
+# def get_answer(question):
+#     try:
+#         answer = requests.get(
+#             url,
+#             json={"question": question},
+#         )
+#     except Exception as err:
+#         return f"Sorry there was a problem with {err}, please check your connection and try again."
+#     if answer.status_code == 200:
+#         return answer.json()["answer"]
+#     return "Sorry, We have a problem with our server"
 def predict(input, history=[]):

index/build_index.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import re
+from pathlib import Path
+from dotenv import load_dotenv
+import openai
+import textwrap
+import jsonlines
+from src.utils import gpt3_embeddings
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai.api_key = OPENAI_API_KEY
+path = Path("./documents")
+with open(path / "result.txt", "r") as f:
+    lines = f.readlines()
+    text = "".join(lines)
+    text = re.sub("\s+", " ", text)  # white space normalization
+result = []
+chunks = textwrap.wrap(text, 4000)
+for chunk in chunks:
+    embedding = gpt3_embeddings(chunk)
+    info = {"content": chunk, "embedding": embedding}
+    result.append(info)
+result_path = Path("./index")
+with jsonlines.open(result_path / "index.jsonl", "w") as writer:
+    writer.write_all(result)

index/index.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

prompts/passage_summarization.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+Summarize the following passage in detail
+passage: <<PASSAGE>>
+summary:

prompts/question_answering.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Use the passage to write a detailed answer to the following question
+passage: <<PASSAGE>>
+question: <<QUESTION>>
+answer:

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 gradio
-python-dotenv

 gradio
+python-dotenv
+jsonlines
+openai