import streamlit as st from retrieve_kb import get_current_knowledge_bases, get_knowledge_base_information from generate_kb import add_links_to_knowledge_base from app import client, default_embedding_function import pandas as pd from tempfile import NamedTemporaryFile import os st.title("Get knowledge boxes") if st.button("Get current knowledge bases"): kbs = get_current_knowledge_bases(client=client) st.json(kbs) collection_name = st.text_input(label="knowledge base name") info = {} collection = None if "df" not in st.session_state: st.session_state["df"] = pd.DataFrame() col1, col2 = st.columns(2) if st.button("Get All"): collection_info, coll = get_knowledge_base_information( client=client, embedding_function=default_embedding_function, kb_name=collection_name, ) st.session_state["collection"] = coll collection = coll # st.write(collection_info) df = pd.DataFrame.from_records(collection_info) df["source"] = df["metadatas"].apply(lambda x: x.get("source", "unkown")) df["title"] = df["metadatas"].apply(lambda x: x.get("title", "unkown")) df = df[["documents", "source", "title", "ids"]] st.session_state["df"] = df if len(st.session_state["df"]) != 0: st.dataframe(st.session_state["df"], width=3_000) unique_df = st.session_state["df"]["source"].unique() st.text(f"unique urls: {len(unique_df)}") st.dataframe(unique_df) st.header("Remove a split") id = st.text_input("Insert a split id") if st.button("Remove Id from collection"): if id in st.session_state["df"]["ids"].values.tolist(): res = st.session_state["collection"].delete(ids=[f"id"]) st.success(f"id {id} deleted") else: st.error(f"id {id} not in kb") st.header("Add url to existing collection") url_text = st.text_input("Insert a url link") if st.button("add url to collection"): urls = [url_text] # put in a list even if only one res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls) st.write(res) st.header("Add pdf to existing collection") uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if st.button("add pdf"): # Create a temporary file with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: # Write the uploaded PDF to the temporary file tmp_file.write(uploaded_file.getvalue()) tmp_path = tmp_file.name print("PATH: ", tmp_path) urls = [tmp_path] res = add_links_to_knowledge_base( client=client, kb_name=collection_name, urls=urls ) st.write(res) # Clean up: delete the temporary file os.remove(tmp_path) # if st.button("add pdf"): # urls = [url_text] # put in a list even if only one # res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls) # st.write(res) st.header("Add csv to existing collection") uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"]) df = None if uploaded_file is not None: try: new_df = pd.read_csv(uploaded_file) st.write("DataFrame:") st.write(new_df) except Exception as e: st.error(str(e)) if st.button("add csv urls to collection"): urls = new_df.values.tolist() st.write(urls) res = add_links_to_knowledge_base( client=client, kb_name=collection_name, urls=urls ) st.write(res)