Charles Chan
commited on
Commit
·
21d443e
1
Parent(s):
de611e2
coding
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
import random
|
3 |
from langchain_community.llms import HuggingFaceHub
|
4 |
-
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
5 |
from langchain_community.vectorstores import FAISS
|
6 |
from datasets import load_dataset
|
7 |
from opencc import OpenCC
|
@@ -35,8 +35,12 @@ if "vector_created" not in st.session_state:
|
|
35 |
if not st.session_state.vector_created:
|
36 |
try:
|
37 |
with st.spinner("正在构建向量数据库..."):
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
st.success("向量数据库构建完成!")
|
41 |
print("向量数据库构建完成!")
|
42 |
except Exception as e:
|
@@ -69,10 +73,10 @@ def answer_question(repo_id, temperature, max_length, question):
|
|
69 |
# 获取答案
|
70 |
try:
|
71 |
with st.spinner("正在筛选本地数据集..."):
|
72 |
-
question_embedding = embeddings.embed_query(question)
|
73 |
question_embedding_str = " ".join(map(str, question_embedding))
|
74 |
# print('question_embedding: ' + question_embedding_str)
|
75 |
-
docs_and_scores = db.similarity_search_with_score(question_embedding_str)
|
76 |
|
77 |
context = "\n".join([doc.page_content for doc, _ in docs_and_scores])
|
78 |
print('context: ' + context)
|
|
|
1 |
import streamlit as st
|
2 |
import random
|
3 |
from langchain_community.llms import HuggingFaceHub
|
4 |
+
from langchain_community.st.session_state.embeddings import SentenceTransformerEmbeddings
|
5 |
from langchain_community.vectorstores import FAISS
|
6 |
from datasets import load_dataset
|
7 |
from opencc import OpenCC
|
|
|
35 |
if not st.session_state.vector_created:
|
36 |
try:
|
37 |
with st.spinner("正在构建向量数据库..."):
|
38 |
+
# all-mpnet-base-v2 是一个由 Sentence Transformers 库提供的预训练模型,
|
39 |
+
# 专门用于生成高质量的句子嵌入(sentence embeddings)。
|
40 |
+
# all-mpnet-base-v2 在多个自然语言处理任务上表现出色,包括语义相似度计算、
|
41 |
+
# 文本检索、聚类等。它能够有效地捕捉句子的语义信息,并生成具有代表性的向量表示。
|
42 |
+
st.session_state.embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
|
43 |
+
st.session_state.db = FAISS.from_texts(st.session_state.answer_list, st.session_state.embeddings)
|
44 |
st.success("向量数据库构建完成!")
|
45 |
print("向量数据库构建完成!")
|
46 |
except Exception as e:
|
|
|
73 |
# 获取答案
|
74 |
try:
|
75 |
with st.spinner("正在筛选本地数据集..."):
|
76 |
+
question_embedding = st.session_state.embeddings.embed_query(question)
|
77 |
question_embedding_str = " ".join(map(str, question_embedding))
|
78 |
# print('question_embedding: ' + question_embedding_str)
|
79 |
+
docs_and_scores = st.session_state.db.similarity_search_with_score(question_embedding_str)
|
80 |
|
81 |
context = "\n".join([doc.page_content for doc, _ in docs_and_scores])
|
82 |
print('context: ' + context)
|