Spaces:
Runtime error
Runtime error
File size: 5,981 Bytes
2b5ef14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
from haystack.document_stores.memory import InMemoryDocumentStore
from haystack.nodes import TfidfRetriever, FARMReader
from google.colab import drive
drive.mount('/content/drive')
import pickle
pickle_file = '/content/drive/MyDrive/Group13_NLP_Project/knowledge_graph.pickle'
# Load the knowledge graph from the pickle file
with open(pickle_file, 'rb') as f:
knowledge_graph = pickle.load(f)
print("Knowledge graph loaded from ", pickle_file)
document_store = InMemoryDocumentStore()
node_sentences = {}
documents = []
nodes = [node for node in knowledge_graph.nodes() if node is not None]
for node in nodes:
# Get all the edges related to the current node
related_edges = [edge for edge in knowledge_graph.edges() if edge[0] == node or edge[1] == node]
# Get the parents and grandparents of the current node
parents = [edge[0] for edge in related_edges if edge[1] == node]
grandparents = []
for parent in parents:
grandparents.extend([edge[0] for edge in related_edges if edge[1] == parent])
# Get the children and grandchildren of the current node
children = [edge[1] for edge in related_edges if edge[0] == node]
grandchildren = []
for child in children:
grandchildren.extend([edge[1] for edge in related_edges if edge[0] == child])
# Create the sentence by combining all the related nodes
sentence_parts = grandparents + parents + [node] + children + grandchildren
sentence = ' '.join(sentence_parts)
# Store the sentence for the current node
node_sentences[node] = sentence
# Create the document with the node and the sentence as the content
documents.append({'text': node, 'content': sentence})
document_store.write_documents(documents)
#Initialize the retriever
retriever = TfidfRetriever(document_store=document_store)
#Initialize the reader
model_name = "primasr/multilingualbert-for-eqa-finetuned"
reader = FARMReader(model_name_or_path=model_name, use_gpu=False)
#Create pipeline with the component of retriever and reader
from haystack.pipelines import Pipeline
pipeline = Pipeline()
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Targeted to Translate English queries to Malay Language
# Initialize the tokenizer
en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id")
# Initialize the model
en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id")
# Targeted to Translate Malay Answer to English Language
# Initialize the tokenizer
id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
# Initialize the model
id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en")
#Defined some pairs for chatbot response
pairs = [
[
"your answer is wrong",
"Sorry for providing wrong answer, here is the newest answer:\n\n",
"I am sorry that I can't actually answer your question =("
],
[
"jawapan anda adalah salah",
"Maaf sedangkan memberi jawapan yang salah. Berikut adalah jawapan yang baru:\n\n",
"Minta Maaf, saya tidak boleh menemukan soalan anda =("
]]
#Function for checking reiterate providing answer for same question
def checkReiterateQuery(query,lang):
if query in [pairs[0][0],pairs[1][0]]:
if lang == 'en':
j = 0
else:
j = 1
return True, j
else:
return False , 3
import gradio as gr
from langdetect import detect
import warnings
warnings.filterwarnings('ignore')
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
chat_history = []
answer_counter = 0
def chatbot_interface(message):
global answer_counter
global result
# Append the current message to the chat history
chat_history.append(message)
lang = detect(message)
reiterate, j = checkReiterateQuery(message, lang)
#If user want to re-iterate the answer for same question
if reiterate:
answer_counter = answer_counter + 1
if answer_counter < 5:
retrieved_main_answer = pairs[j][1] + result['answers'][answer_counter].answer
retrieved_main_context = result['answers'][answer_counter].context
else:
retrieved_main_answer = pairs[j][2]
retrieved_main_context = ""
else:
answer_counter = 0
#if language is english then convert it to malay language
if lang == "en":
tokenized_text = en_id_tokenizer.prepare_seq2seq_batch([message], return_tensors='pt')
translation = en_id_model.generate(**tokenized_text)
message = en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
result = pipeline.run(query=message.lower(), params={
"Retriever": {"top_k": 5},
"Reader": {"top_k": 5}})
retrieved_main_answer = result['answers'][answer_counter].answer
retrieved_main_context = result['answers'][answer_counter].context
response = retrieved_main_answer + ", " + retrieved_main_context
#Convert the response to english if user ask question in english
if lang == "en":
tokenized_text = id_en_tokenizer.prepare_seq2seq_batch([response.lower()], return_tensors='pt')
translation = id_en_model.generate(**tokenized_text)
response = id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
# Append the response to the chat history
chat_history.append(response)
# Join the chat history with newline characters
chat_history_text = "\n\n".join(chat_history)
return response, chat_history_text
# Create a Gradio interface
iface = gr.Interface(
fn=chatbot_interface,
inputs=gr.inputs.Textbox(label="Please Type Your Question Here: "),
outputs=[gr.outputs.Textbox(label="Answers"), gr.outputs.Textbox(label="Chat History")],
description="## Question Answering system\n\nIt supports **English** and **Bahasa Malaysia**.",
allow_flagging = False
)
#Demo for the chatbot
iface.launch(inline = False)
|