Spaces:

dengkane
/

learn-streamlit

Sleeping

App Files Files Community

learn-streamlit / app.py

dengkane

Update app.py

2fa8790 over 2 years ago

raw

history blame

3.07 kB

	import streamlit as st
	# To make things easier later, we're also importing numpy and pandas for
	# working with sample data.
	import numpy as np
	import pandas as pd
	import faiss

	from sentence_transformers import SentenceTransformer

	model = SentenceTransformer('moka-ai/m3e-base')

	#Our sentences we like to encode
	sentences = [
	'* Moka 此文本嵌入模型由 MokaAI 训练并开源，训练脚本使用 uniem',
	'* Massive 此文本嵌入模型通过千万级的中文句对数据集进行训练',
	'* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算，异质文本检索等功能，未来还会支持代码检索，ALL in one'
	]

	#Sentences are encoded by calling model.encode()
	embeddings = model.encode(sentences)

	#Print the embeddings
	#for sentence, embedding in zip(sentences, embeddings):
	# st.write("Sentence:", sentence)
	# st.write("Embedding:", embedding)
	# st.write("")


	def get_embedding(text_content):
	return model.encode(text_content)

	# Load the text file as knowledge
	knowledge_file = 'knowledge.txt'
	knowledge = []
	with open(knowledge_file, 'r', encoding='utf-8') as file:
	for line in file:
	knowledge.append(line.strip())

	# Perform embedding for the knowledge texts and add to index
	embeddings = []
	for text in knowledge:
	# Add your code here for text embedding (e.g., using word embeddings, sentence transformers, etc.)
	embedding = get_embedding(text)
	embeddings.append(embedding)
	embeddings = np.array(embeddings)

	# Create an index
	index = faiss.IndexFlatIP(embeddings.shape[1]) # Use Inner Product (IP) as similarity measure

	index.add(embeddings)

	# Get user input for a question
	question = st.text_input("Enter your question: ")

	# Perform embedding for the question
	question_embedding = get_embedding(question)

	# Search index for the most similar content
	k = 5 # Number of results to retrieve
	D, I = index.search(np.array([question_embedding]), k)

	# Display the results
	st.write("Top {} similar content:".format(k))
	for i in range(k):
	st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]]), I[0][i])

	st.title('My first app')

	st.write("Here's our first attempt at using data to create a table:")

	df = pd.DataFrame({
	'first column': [1, 2, 3, 4],
	'second column': [10, 20, 30, 40]
	})

	st.write(df)

	if st.checkbox('Show dataframe'):
	chart_data = pd.DataFrame(
	np.random.randn(20, 3),
	columns=['a', 'b', 'c'])

	chart_data


	option = st.selectbox(
	'Which number do you like best?',
	df['first column'])

	st.write('You selected: ', option)

	text1 = st.text('This is some text.')

	if st.button('Say hello'):
	st.write('Why hello there')
	else:
	st.write('Goodbye')


	agree = st.checkbox('I agree')

	if agree:
	st.write('Great!')

	age = st.slider('How old are you?', 0, 130, 25)

	st.write("I'm ", age, 'years old')

	title = st.text_input('Movie title', 'Life of Brian')

	st.write('The current movie title is', title)

	number = st.number_input('Insert a number')

	st.write('The current number is ', number)