Charles Chan commited on
Commit
51c0f15
·
1 Parent(s): 10b5e55
Files changed (2) hide show
  1. app.py +6 -11
  2. requirements.txt +0 -1
app.py CHANGED
@@ -4,10 +4,8 @@ from langchain_community.llms import HuggingFaceHub
4
  from langchain_community.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from datasets import load_dataset
7
- from opencc import OpenCC
8
 
9
- # 使用 進擊的巨人 数据集
10
- # 原数据集是是繁体中文,为了调试方便,将其转换成简体中文之后使用
11
  if "data_list" not in st.session_state:
12
  st.session_state.data_list = []
13
  st.session_state.answer_list = []
@@ -15,15 +13,12 @@ if "data_list" not in st.session_state:
15
  if not st.session_state.data_list:
16
  try:
17
  with st.spinner("正在读取数据库..."):
18
- converter = OpenCC('tw2s') # 'tw2s.json' 表示繁体中文到简体中文的转换
19
- dataset = load_dataset("rorubyy/attack_on_titan_wiki_chinese")
20
  data_list = []
21
  answer_list = []
22
  for example in dataset["train"]:
23
- converted_answer = converter.convert(example["Answer"])
24
- converted_question = converter.convert(example["Question"])
25
- answer_list.append(converted_answer)
26
- data_list.append({"Question": converted_question, "Answer": converted_answer})
27
  st.session_state.answer_list = answer_list
28
  st.session_state.data_list = data_list
29
  st.success("数据库读取完成!")
@@ -112,7 +107,7 @@ def answer_question(repo_id, temperature, max_length, question):
112
  return {"prompt": "", "answer": "An error occurred during the answering process.", "pure_answer": ""}
113
 
114
  # Streamlit 界面
115
- st.title("進擊的巨人 知识库问答系统")
116
 
117
  col1, col2 = st.columns(2)
118
  with col1:
@@ -154,7 +149,7 @@ with col3:
154
  generate_answer(gemma, float(temperature), int(max_length), random_question)
155
 
156
  with col4:
157
- question = st.text_area("请输入问题", "《进击的巨人》中都有哪些主要角色?")
158
  if st.button("提交输入的问题"):
159
  if not question:
160
  st.warning("请输入问题!")
 
4
  from langchain_community.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from datasets import load_dataset
 
7
 
8
+ # 使用 假知识 数据集
 
9
  if "data_list" not in st.session_state:
10
  st.session_state.data_list = []
11
  st.session_state.answer_list = []
 
13
  if not st.session_state.data_list:
14
  try:
15
  with st.spinner("正在读取数据库..."):
16
+ dataset = load_dataset("zeerd/fake_knowledge")
 
17
  data_list = []
18
  answer_list = []
19
  for example in dataset["train"]:
20
+ answer_list.append(example["Answer"])
21
+ data_list.append({"Question": example["Question"], "Answer": example["Answer"]})
 
 
22
  st.session_state.answer_list = answer_list
23
  st.session_state.data_list = data_list
24
  st.success("数据库读取完成!")
 
107
  return {"prompt": "", "answer": "An error occurred during the answering process.", "pure_answer": ""}
108
 
109
  # Streamlit 界面
110
+ st.title("假知识库问答系统")
111
 
112
  col1, col2 = st.columns(2)
113
  with col1:
 
149
  generate_answer(gemma, float(temperature), int(max_length), random_question)
150
 
151
  with col4:
152
+ question = st.text_area("请输入问题", "谁是潜水员?")
153
  if st.button("提交输入的问题"):
154
  if not question:
155
  st.warning("请输入问题!")
requirements.txt CHANGED
@@ -6,4 +6,3 @@ langchain-huggingface
6
  sentence_transformers
7
  faiss-cpu
8
  datasets
9
- opencc-python-reimplemented
 
6
  sentence_transformers
7
  faiss-cpu
8
  datasets