import streamlit as st # To make things easier later, we're also importing numpy and pandas for # working with sample data. import numpy as np import pandas as pd import faiss import numpy as np from transformers import AutoTokenizer, AutoModel # Load the embedding model and tokenizer model_name = "moka-ai/m3e-base" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Generate some random text contents texts = ["This is the first document.", "This is the second document.", "And this is the third one.", "Is this the first document?"] # Convert the text contents to embeddings embeddings = [] for text in texts: input_ids = tokenizer.encode(text, return_tensors="pt") with torch.no_grad(): embedding = model(input_ids)[0][0].numpy() embeddings.append(embedding) embeddings = np.array(embeddings) # Create a Faiss index d = embeddings.shape[1] # Dimension of the embeddings index = faiss.IndexFlatIP(d) # Index that uses inner product (dot product) similarity # Add the embeddings to the index index.add(embeddings) # Search for similar documents query = "This is a new document." input_ids = tokenizer.encode(query, return_tensors="pt") with torch.no_grad(): query_embedding = model(input_ids)[0][0].numpy() k = 2 # Number of similar documents to retrieve D, I = index.search(query_embedding.reshape(1, -1), k) # Print the results st.write(f"Query: {query}") for i in range(k): st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})") # Search index for the most similar content k = 5 # Number of results to retrieve D, I = index.search(np.array([question_embedding]), k) # Display the results st.write("Top {} similar content:".format(k)) for i in range(k): st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]], I[0][i])) st.title('My first app') st.write("Here's our first attempt at using data to create a table:") df = pd.DataFrame({ 'first column': [1, 2, 3, 4], 'second column': [10, 20, 30, 40] }) st.write(df) if st.checkbox('Show dataframe'): chart_data = pd.DataFrame( np.random.randn(20, 3), columns=['a', 'b', 'c']) chart_data option = st.selectbox( 'Which number do you like best?', df['first column']) st.write('You selected: ', option) text1 = st.text('This is some text.') if st.button('Say hello'): st.write('Why hello there') else: st.write('Goodbye') agree = st.checkbox('I agree') if agree: st.write('Great!') age = st.slider('How old are you?', 0, 130, 25) st.write("I'm ", age, 'years old') title = st.text_input('Movie title', 'Life of Brian') st.write('The current movie title is', title) number = st.number_input('Insert a number') st.write('The current number is ', number)