Charles Chan commited on
Commit
21d443e
·
1 Parent(s): de611e2
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import random
3
  from langchain_community.llms import HuggingFaceHub
4
- from langchain_community.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from datasets import load_dataset
7
  from opencc import OpenCC
@@ -35,8 +35,12 @@ if "vector_created" not in st.session_state:
35
  if not st.session_state.vector_created:
36
  try:
37
  with st.spinner("正在构建向量数据库..."):
38
- embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
39
- db = FAISS.from_texts(st.session_state.answer_list, embeddings)
 
 
 
 
40
  st.success("向量数据库构建完成!")
41
  print("向量数据库构建完成!")
42
  except Exception as e:
@@ -69,10 +73,10 @@ def answer_question(repo_id, temperature, max_length, question):
69
  # 获取答案
70
  try:
71
  with st.spinner("正在筛选本地数据集..."):
72
- question_embedding = embeddings.embed_query(question)
73
  question_embedding_str = " ".join(map(str, question_embedding))
74
  # print('question_embedding: ' + question_embedding_str)
75
- docs_and_scores = db.similarity_search_with_score(question_embedding_str)
76
 
77
  context = "\n".join([doc.page_content for doc, _ in docs_and_scores])
78
  print('context: ' + context)
 
1
  import streamlit as st
2
  import random
3
  from langchain_community.llms import HuggingFaceHub
4
+ from langchain_community.st.session_state.embeddings import SentenceTransformerEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from datasets import load_dataset
7
  from opencc import OpenCC
 
35
  if not st.session_state.vector_created:
36
  try:
37
  with st.spinner("正在构建向量数据库..."):
38
+ # all-mpnet-base-v2 是一个由 Sentence Transformers 库提供的预训练模型,
39
+ # 专门用于生成高质量的句子嵌入(sentence embeddings)。
40
+ # all-mpnet-base-v2 在多个自然语言处理任务上表现出色,包括语义相似度计算、
41
+ # 文本检索、聚类等。它能够有效地捕捉句子的语义信息,并生成具有代表性的向量表示。
42
+ st.session_state.embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
43
+ st.session_state.db = FAISS.from_texts(st.session_state.answer_list, st.session_state.embeddings)
44
  st.success("向量数据库构建完成!")
45
  print("向量数据库构建完成!")
46
  except Exception as e:
 
73
  # 获取答案
74
  try:
75
  with st.spinner("正在筛选本地数据集..."):
76
+ question_embedding = st.session_state.embeddings.embed_query(question)
77
  question_embedding_str = " ".join(map(str, question_embedding))
78
  # print('question_embedding: ' + question_embedding_str)
79
+ docs_and_scores = st.session_state.db.similarity_search_with_score(question_embedding_str)
80
 
81
  context = "\n".join([doc.page_content for doc, _ in docs_and_scores])
82
  print('context: ' + context)