Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| # To make things easier later, we're also importing numpy and pandas for | |
| # working with sample data. | |
| import numpy as np | |
| import pandas as pd | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer('moka-ai/m3e-base') | |
| #Our sentences we like to encode | |
| sentences = [ | |
| '* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem', | |
| '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练', | |
| '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one' | |
| ] | |
| #Sentences are encoded by calling model.encode() | |
| embeddings = model.encode(sentences) | |
| #Print the embeddings | |
| #for sentence, embedding in zip(sentences, embeddings): | |
| # st.write("Sentence:", sentence) | |
| # st.write("Embedding:", embedding) | |
| # st.write("") | |
| def get_embedding(text_content): | |
| return model.encode(text_content) | |
| # Load the text file as knowledge | |
| knowledge_file = 'knowledge.txt' | |
| knowledge = [] | |
| with open(knowledge_file, 'r', encoding='utf-8') as file: | |
| for line in file: | |
| knowledge.append(line.strip()) | |
| # Perform embedding for the knowledge texts and add to index | |
| embeddings = [] | |
| for text in knowledge: | |
| # Add your code here for text embedding (e.g., using word embeddings, sentence transformers, etc.) | |
| embedding = get_embedding(text) | |
| embeddings.append(embedding) | |
| embeddings = np.array(embeddings) | |
| # Create an index | |
| index = faiss.IndexFlatIP(embeddings.shape[1]) # Use Inner Product (IP) as similarity measure | |
| index.add(embeddings) | |
| # Get user input for a question | |
| question = st.text_input("Enter your question: ") | |
| # Perform embedding for the question | |
| question_embedding = get_embedding(question) | |
| # Search index for the most similar content | |
| k = 5 # Number of results to retrieve | |
| D, I = index.search(np.array([question_embedding]), k) | |
| # Display the results | |
| st.write("Top {} similar content:".format(k)) | |
| for i in range(k): | |
| st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]]), I[0][i]) | |
| st.title('My first app') | |
| st.write("Here's our first attempt at using data to create a table:") | |
| df = pd.DataFrame({ | |
| 'first column': [1, 2, 3, 4], | |
| 'second column': [10, 20, 30, 40] | |
| }) | |
| st.write(df) | |
| if st.checkbox('Show dataframe'): | |
| chart_data = pd.DataFrame( | |
| np.random.randn(20, 3), | |
| columns=['a', 'b', 'c']) | |
| chart_data | |
| option = st.selectbox( | |
| 'Which number do you like best?', | |
| df['first column']) | |
| st.write('You selected: ', option) | |
| text1 = st.text('This is some text.') | |
| if st.button('Say hello'): | |
| st.write('Why hello there') | |
| else: | |
| st.write('Goodbye') | |
| agree = st.checkbox('I agree') | |
| if agree: | |
| st.write('Great!') | |
| age = st.slider('How old are you?', 0, 130, 25) | |
| st.write("I'm ", age, 'years old') | |
| title = st.text_input('Movie title', 'Life of Brian') | |
| st.write('The current movie title is', title) | |
| number = st.number_input('Insert a number') | |
| st.write('The current number is ', number) | |