Charles Chan
commited on
Commit
·
a054c10
1
Parent(s):
630d3f4
coding
Browse files- app.py +5 -6
- requirements.txt +1 -0
app.py
CHANGED
@@ -4,14 +4,13 @@ from langchain_community.llms import HuggingFaceHub
|
|
4 |
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
5 |
from langchain_community.vectorstores import FAISS
|
6 |
from datasets import load_dataset
|
7 |
-
from
|
8 |
|
9 |
# 使用 進擊的巨人 数据集
|
10 |
try:
|
11 |
-
converter =
|
12 |
dataset = load_dataset("rorubyy/attack_on_titan_wiki_chinese")
|
13 |
-
answer_list = [converter(example["Answer"])
|
14 |
-
|
15 |
except Exception as e:
|
16 |
st.error(f"读取数据集失败:{e}")
|
17 |
st.stop()
|
@@ -82,9 +81,9 @@ with col3:
|
|
82 |
random_index = random.randint(0, dataset_size - 1)
|
83 |
# 读取随机问题
|
84 |
random_question = dataset["train"][random_index]["Question"]
|
85 |
-
random_question = converter(random_question)
|
86 |
origin_answer = dataset["train"][random_index]["Answer"]
|
87 |
-
origin_answer = converter(origin_answer)
|
88 |
print('[]' + str(random_index) + '/' + str(dataset_size) + ']random_question: ' + random_question)
|
89 |
print('origin_answer: ' + origin_answer)
|
90 |
|
|
|
4 |
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
5 |
from langchain_community.vectorstores import FAISS
|
6 |
from datasets import load_dataset
|
7 |
+
from opencc import OpenCC
|
8 |
|
9 |
# 使用 進擊的巨人 数据集
|
10 |
try:
|
11 |
+
converter = OpenCC('tw2s.json') # 'tw2s.json' 表示繁体中文到简体中文的转换
|
12 |
dataset = load_dataset("rorubyy/attack_on_titan_wiki_chinese")
|
13 |
+
answer_list = [converter.convert(example["Answer"]) for example in dataset["train"]]
|
|
|
14 |
except Exception as e:
|
15 |
st.error(f"读取数据集失败:{e}")
|
16 |
st.stop()
|
|
|
81 |
random_index = random.randint(0, dataset_size - 1)
|
82 |
# 读取随机问题
|
83 |
random_question = dataset["train"][random_index]["Question"]
|
84 |
+
random_question = converter.convert(random_question)
|
85 |
origin_answer = dataset["train"][random_index]["Answer"]
|
86 |
+
origin_answer = converter.convert(origin_answer)
|
87 |
print('[]' + str(random_index) + '/' + str(dataset_size) + ']random_question: ' + random_question)
|
88 |
print('origin_answer: ' + origin_answer)
|
89 |
|
requirements.txt
CHANGED
@@ -6,3 +6,4 @@ langchain-huggingface
|
|
6 |
sentence_transformers
|
7 |
faiss-cpu
|
8 |
datasets
|
|
|
|
6 |
sentence_transformers
|
7 |
faiss-cpu
|
8 |
datasets
|
9 |
+
opencc-python-reimplemented
|