zinoubm commited on
Commit
2d65d8a
·
1 Parent(s): 0d80d9a

adding the asking functionality

Browse files
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  .env
2
- env/
 
 
1
  .env
2
+ env/
3
+ __pycache__/
TwitterChatBot/__init__.py ADDED
File without changes
TwitterChatBot/chat.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ from pathlib import Path
4
+ from dotenv import load_dotenv
5
+
6
+ from TwitterChatBot.index import IndexSearchEngine
7
+ from TwitterChatBot.prompt import (
8
+ QuestionAnsweringPrompt,
9
+ PassageSummarizationPrompt,
10
+ TextPromptLoader,
11
+ )
12
+
13
+
14
+ load_dotenv()
15
+
16
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
+
18
+ openai.api_key = OPENAI_API_KEY
19
+
20
+
21
+ class ChatBot:
22
+ def __init__(
23
+ self, index_search_engine: IndexSearchEngine, prompt_loader, gpt_manager
24
+ ):
25
+ self.index_search_engine = index_search_engine
26
+ self.prompet_loader = prompt_loader
27
+ self.gpt_manager = gpt_manager
28
+
29
+ def ask(self, question):
30
+ search_result = self.index_search_engine.search(question=question, count=2)
31
+
32
+ answers = []
33
+ for result in search_result:
34
+ question_answering_prompt = QuestionAnsweringPrompt(
35
+ passage=result, question=question, prompt_loader=self.prompet_loader
36
+ )
37
+ prompt = question_answering_prompt.load(
38
+ Path("./prompts") / "question_answering.txt"
39
+ )
40
+
41
+ answer = self.gpt_manager.get_completion(
42
+ prompt=prompt, max_tokens=80, model="text-curie-001"
43
+ )
44
+ answers.append(answer)
45
+
46
+ passage_summarization_prompt = PassageSummarizationPrompt(
47
+ "\n".join(answers), self.prompet_loader
48
+ )
49
+
50
+ prompt = passage_summarization_prompt.load(
51
+ Path("./prompts") / "passage_summarization.txt"
52
+ )
53
+
54
+ final_answer = self.gpt_manager.get_completion(prompt=prompt)
55
+ return final_answer
TwitterChatBot/gpt_3_manager.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+
3
+
4
+ class Gpt3Manager:
5
+ def __init__(self, api_key):
6
+ openai.api_key = api_key
7
+
8
+ def get_completion(self, prompt, max_tokens=128, model="text-davinci-003"):
9
+ response = None
10
+ try:
11
+ response = openai.Completion.create(
12
+ prompt=prompt,
13
+ max_tokens=max_tokens,
14
+ model=model,
15
+ )["choices"][0]["text"]
16
+
17
+ except Exception as err:
18
+ print(f"Sorry, There was a problem \n\n {err}")
19
+
20
+ return response
21
+
22
+ def get_embedding(self, prompt, model="text-similarity-ada-001"):
23
+ prompt = prompt.replace("\n", " ")
24
+ embedding = None
25
+ try:
26
+ embedding = openai.Embedding.create(input=[prompt], model=model)["data"][0][
27
+ "embedding"
28
+ ]
29
+ except Exception as err:
30
+ print(f"Sorry, There was a problem {err}")
31
+
32
+ return embedding
TwitterChatBot/index.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ import jsonlines
3
+ from TwitterChatBot.utils import dot_similarity
4
+
5
+
6
+ class Index(ABC):
7
+ @abstractmethod
8
+ def load(self, path):
9
+ pass
10
+
11
+
12
+ class JsonLinesIndex(Index):
13
+ def load(self, path):
14
+ with jsonlines.open(path) as passages:
15
+ indexes = list(passages)
16
+ return indexes
17
+
18
+
19
+ class IndexSearchEngine:
20
+ def __init__(self, indexes, gpt_manager):
21
+ self.indexes = indexes
22
+ self.gpt_manager = gpt_manager
23
+
24
+ def search(self, question, count=4):
25
+ question_embedding = self.gpt_manager.get_embedding(prompt=question)
26
+
27
+ simmilarities = []
28
+ for index in self.indexes:
29
+ embedding = index["embedding"]
30
+ score = dot_similarity(question_embedding, embedding)
31
+ simmilarities.append({"index": index, "score": score})
32
+
33
+ sorted_similarities = sorted(
34
+ simmilarities, key=lambda x: x["score"], reverse=True
35
+ )
36
+
37
+ return [result["index"]["content"] for result in sorted_similarities[:count]]
TwitterChatBot/main.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+
5
+ from TwitterChatBot.index import IndexSearchEngine
6
+ from TwitterChatBot.gpt_3_manager import Gpt3Manager
7
+ from TwitterChatBot.chat import ChatBot
8
+ from TwitterChatBot.index import JsonLinesIndex
9
+ from TwitterChatBot.prompt import TextPromptLoader
10
+
11
+ load_dotenv()
12
+
13
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
+
15
+ path = Path("./index") / "index.jsonl"
16
+
17
+
18
+ def ask(question):
19
+ index = JsonLinesIndex()
20
+ loaded = index.load(path)
21
+ gpt_manager = Gpt3Manager(api_key=OPENAI_API_KEY)
22
+
23
+ engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
24
+ loader = TextPromptLoader()
25
+ chatbot = ChatBot(engine, prompt_loader=loader, gpt_manager=gpt_manager)
26
+
27
+ answer = chatbot.ask(question)
28
+ return answer
TwitterChatBot/prompt.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+ # Prompt Loaders
4
+ class PromptLoader(ABC):
5
+ @abstractmethod
6
+ def load_prompt(self, path):
7
+ pass
8
+
9
+
10
+ class TextPromptLoader(PromptLoader):
11
+ def load_prompt(self, path):
12
+ with open(path) as f:
13
+ lines = f.readlines()
14
+ return "".join(lines)
15
+
16
+
17
+ # Prompts
18
+ class Prompt(ABC):
19
+ def __init__(self, prompt_loader: PromptLoader):
20
+ self.prompt_loader = prompt_loader
21
+
22
+ def load_prompt(self, path):
23
+ return self.prompt_loader.load_prompt(path)
24
+
25
+ @abstractmethod
26
+ def load(self, path):
27
+ pass
28
+
29
+
30
+ class QuestionAnsweringPrompt(Prompt):
31
+ def __init__(self, passage, question, prompt_loader):
32
+ super().__init__(prompt_loader=prompt_loader)
33
+ self.passage = passage
34
+ self.question = question
35
+
36
+ # trust me, you'll need this later
37
+ # .replace("<<PASSAGE>>", self.result["index"]["content"])
38
+
39
+ def load(self, path):
40
+ prompt = (
41
+ self.load_prompt(path)
42
+ .replace("<<PASSAGE>>", self.passage)
43
+ .replace("<<QUESTION>>", self.question)
44
+ )
45
+ return prompt
46
+
47
+
48
+ class PassageSummarizationPrompt(Prompt):
49
+ def __init__(self, passage, prompt_loader):
50
+ super().__init__(prompt_loader=prompt_loader)
51
+ self.passage = passage
52
+
53
+ # prompt = self.load_prompt(path).replace("<<PASSAGE>>", )
54
+
55
+ def load(self, path):
56
+ prompt = self.load_prompt(path).replace("<<PASSAGE>>", self.passage)
57
+ return prompt
TwitterChatBot/tests/chat_test.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ from index import IndexSearchEngine
5
+ from gpt_3_manager import Gpt3Manager
6
+
7
+ from dotenv import load_dotenv
8
+ from chat import ChatBot
9
+ from index import JsonLinesIndex
10
+
11
+ from prompt import TextPromptLoader
12
+
13
+ load_dotenv()
14
+
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+
18
+ def test_chatbot():
19
+ path = Path("index") / "index.jsonl"
20
+
21
+ index = JsonLinesIndex()
22
+ loaded = index.load(path)
23
+ gpt_manager = Gpt3Manager(api_key=OPENAI_API_KEY)
24
+
25
+ engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
26
+ loader = TextPromptLoader()
27
+ chatbot = ChatBot(engine, prompt_loader=loader, gpt_manager=gpt_manager)
28
+
29
+ answer = chatbot.ask("What does the twitter terms of service does")
30
+
31
+ assert answer != None
TwitterChatBot/tests/gpt_3_manager_test.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from gpt_3_manager import Gpt3Manager
4
+
5
+ load_dotenv()
6
+
7
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
8
+
9
+
10
+ def test_gpt3_completion():
11
+ manager = Gpt3Manager(api_key=OPENAI_API_KEY)
12
+ request = manager.get_completion(
13
+ prompt="This is a testing prompt", max_tokens=10, model="text-ada-001"
14
+ )
15
+ assert request != None
16
+
17
+
18
+ def test_gpt3_embedding():
19
+ manager = Gpt3Manager(api_key=OPENAI_API_KEY)
20
+ request = manager.get_embedding(prompt="This is a testing prompt")
21
+ assert request != None
TwitterChatBot/tests/index_test.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from index import JsonLinesIndex, IndexSearchEngine
3
+ from gpt_3_manager import Gpt3Manager
4
+ from pathlib import Path
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
10
+
11
+
12
+ def test_jsonlines_index():
13
+ path = Path("index") / "index.jsonl"
14
+
15
+ index = JsonLinesIndex()
16
+ result = index.load(path)
17
+
18
+ assert result != None
19
+
20
+
21
+ def test_index_serach_engine():
22
+ path = Path("index") / "index.jsonl"
23
+ gpt_manager = Gpt3Manager(OPENAI_API_KEY)
24
+ index = JsonLinesIndex()
25
+ loaded = index.load(path)
26
+ engine = IndexSearchEngine(loaded, gpt_manager=gpt_manager)
27
+
28
+ results = engine.search(question="What does the twitter tos does")
29
+
30
+ assert results != None
TwitterChatBot/tests/prompt_test.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from prompt import QuestionAnsweringPrompt, PassageSummarizationPrompt, TextPromptLoader
3
+
4
+
5
+ def test_text_prompt_loader():
6
+ path = Path("prompts") / "question_answering.txt"
7
+ prompt_loader = TextPromptLoader()
8
+
9
+ prompt = prompt_loader.load_prompt(path)
10
+ testing_prompt = (
11
+ "Use the passage to write a detailed answer to the following question\n"
12
+ "\n"
13
+ "passage: <<PASSAGE>>\n"
14
+ "\n"
15
+ "question: <<QUESTION>>\n"
16
+ "\n"
17
+ "answer:"
18
+ )
19
+
20
+ assert prompt == testing_prompt
21
+
22
+
23
+ def test_question_answering_prompt():
24
+ path = Path("prompts") / "question_answering.txt"
25
+
26
+ passage = "Hi, I'm foo and I love cycling and programming"
27
+ question = "What is foo's hobby"
28
+
29
+ prompt_loader = TextPromptLoader()
30
+ prompt = QuestionAnsweringPrompt(passage, question, prompt_loader)
31
+ loaded_prompt = prompt.load(path)
32
+
33
+ testing_prompt = (
34
+ "Use the passage to write a detailed answer to the following question\n"
35
+ "\n"
36
+ "passage: Hi, I'm foo and I love cycling and programming\n"
37
+ "\n"
38
+ "question: What is foo's hobby\n"
39
+ "\n"
40
+ "answer:"
41
+ )
42
+
43
+ assert loaded_prompt == testing_prompt
44
+
45
+
46
+ def test_passage_summarization_prompt():
47
+ path = Path("prompts") / "passage_summarization.txt"
48
+
49
+ passage = "Hi, I'm foo and I love cycling and programming"
50
+
51
+ prompt_loader = TextPromptLoader()
52
+ prompt = PassageSummarizationPrompt(passage, prompt_loader)
53
+ loaded_prompt = prompt.load(path)
54
+
55
+ testing_prompt = (
56
+ "Summarize the following passage in detail\n"
57
+ "passage: Hi, I'm foo and I love cycling and programming\n"
58
+ "\n"
59
+ "summary:"
60
+ )
61
+
62
+ assert loaded_prompt == testing_prompt
TwitterChatBot/tests/utils_test.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from utils import load_prompt
3
+
4
+
5
+ def test_load_prompt_default():
6
+ path = Path("prompts") / "question_answering.txt"
7
+
8
+ with open(path) as f:
9
+ lines = f.readlines()
10
+ testing_prompt = "".join(lines)
11
+
12
+ prompt = load_prompt(path)
13
+
14
+ assert prompt == testing_prompt
TwitterChatBot/utils.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def load_prompt(path):
5
+ with open(path) as f:
6
+ lines = f.readlines()
7
+ return "".join(lines)
8
+
9
+
10
+ def cosine_similarity(emb1, emb2):
11
+ return np.dot(emb1, emb2) / (
12
+ (np.dot(emb1, emb1) ** 0.5) * (np.dot(emb2, emb2) ** 0.5)
13
+ )
14
+
15
+
16
+ def dot_similarity(emb1, emb2):
17
+ return np.dot(emb1, emb2)
app.py CHANGED
@@ -1,26 +1,27 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- from dotenv import load_dotenv
5
 
6
- load_dotenv()
7
 
8
- url = os.getenv("URL")
 
 
9
 
10
 
11
- def get_answer(question):
12
- try:
13
- answer = requests.get(
14
- url,
15
- json={"question": question},
16
- )
17
- except Exception as err:
18
- return f"Sorry there was a problem with {err}, please check your connection and try again."
19
-
20
- if answer.status_code == 200:
21
- return answer.json()["answer"]
22
-
23
- return "Sorry, We have a problem with our server"
24
 
25
 
26
  def predict(input, history=[]):
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ from TwitterChatBot.main import ask
5
 
 
6
 
7
+ def get_answer(question):
8
+ answer = ask(question=question)
9
+ return answer.strip()
10
 
11
 
12
+ # def get_answer(question):
13
+ # try:
14
+ # answer = requests.get(
15
+ # url,
16
+ # json={"question": question},
17
+ # )
18
+ # except Exception as err:
19
+ # return f"Sorry there was a problem with {err}, please check your connection and try again."
20
+
21
+ # if answer.status_code == 200:
22
+ # return answer.json()["answer"]
23
+
24
+ # return "Sorry, We have a problem with our server"
25
 
26
 
27
  def predict(input, history=[]):
index/build_index.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from pathlib import Path
4
+ from dotenv import load_dotenv
5
+
6
+ import openai
7
+ import textwrap
8
+ import jsonlines
9
+
10
+ from src.utils import gpt3_embeddings
11
+
12
+ load_dotenv()
13
+
14
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
+
16
+ openai.api_key = OPENAI_API_KEY
17
+
18
+ path = Path("./documents")
19
+
20
+
21
+ with open(path / "result.txt", "r") as f:
22
+ lines = f.readlines()
23
+ text = "".join(lines)
24
+ text = re.sub("\s+", " ", text) # white space normalization
25
+
26
+ result = []
27
+
28
+ chunks = textwrap.wrap(text, 4000)
29
+ for chunk in chunks:
30
+ embedding = gpt3_embeddings(chunk)
31
+ info = {"content": chunk, "embedding": embedding}
32
+ result.append(info)
33
+
34
+ result_path = Path("./index")
35
+
36
+ with jsonlines.open(result_path / "index.jsonl", "w") as writer:
37
+ writer.write_all(result)
index/index.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
prompts/passage_summarization.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Summarize the following passage in detail
2
+ passage: <<PASSAGE>>
3
+
4
+ summary:
prompts/question_answering.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Use the passage to write a detailed answer to the following question
2
+
3
+ passage: <<PASSAGE>>
4
+
5
+ question: <<QUESTION>>
6
+
7
+ answer:
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio
2
- python-dotenv
 
 
 
1
  gradio
2
+ python-dotenv
3
+ jsonlines
4
+ openai