Spaces:

brianknowsai
/

collection-manager

Running

File size: 3,453 Bytes

import streamlit as st
from retrieve_kb import get_current_knowledge_bases, get_knowledge_base_information
from generate_kb import add_links_to_knowledge_base
from app import client, default_embedding_function
import pandas as pd
from tempfile import NamedTemporaryFile
import os

st.title("Get knowledge boxes")

if st.button("Get current knowledge bases"):
    kbs = get_current_knowledge_bases(client=client)
    st.json(kbs)

collection_name = st.text_input(label="knowledge base name")
info = {}
collection = None

if "df" not in st.session_state:
    st.session_state["df"] = pd.DataFrame()

col1, col2 = st.columns(2)

if st.button("Get All"):
    collection_info, coll = get_knowledge_base_information(
        client=client,
        embedding_function=default_embedding_function,
        kb_name=collection_name,
    )
    st.session_state["collection"] = coll
    collection = coll
    # st.write(collection_info)
    df = pd.DataFrame.from_records(collection_info)
    df["source"] = df["metadatas"].apply(lambda x: x.get("source", "unkown"))
    df["title"] = df["metadatas"].apply(lambda x: x.get("title", "unkown"))
    df = df[["documents", "source", "title", "ids"]]
    st.session_state["df"] = df


if len(st.session_state["df"]) != 0:
    st.dataframe(st.session_state["df"], width=3_000)
    unique_df = st.session_state["df"]["source"].unique()
    st.text(f"unique urls:  {len(unique_df)}")
    st.dataframe(unique_df)
st.header("Remove a split")
id = st.text_input("Insert a split id")
if st.button("Remove Id from collection"):
    if id in st.session_state["df"]["ids"].values.tolist():
        res = st.session_state["collection"].delete(ids=[f"id"])
        st.success(f"id {id} deleted")
    else:
        st.error(f"id {id} not in kb")

st.header("Add url to existing collection")
url_text = st.text_input("Insert a url link")
if st.button("add url to collection"):
    urls = [url_text]  # put in a list even if only one
    res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls)
    st.write(res)

st.header("Add pdf to existing collection")
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if st.button("add pdf"):
    # Create a temporary file
    with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
        # Write the uploaded PDF to the temporary file
        tmp_file.write(uploaded_file.getvalue())
        tmp_path = tmp_file.name
        print("PATH: ", tmp_path)
        urls = [tmp_path]
        res = add_links_to_knowledge_base(
            client=client, kb_name=collection_name, urls=urls
        )
        st.write(res)
    # Clean up: delete the temporary file
    os.remove(tmp_path)

# if st.button("add pdf"):
#     urls = [url_text]  # put in a list even if only one
#     res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls)
#     st.write(res)


st.header("Add csv to existing collection")
uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
df = None

if uploaded_file is not None:
    try:
        new_df = pd.read_csv(uploaded_file)
        st.write("DataFrame:")
        st.write(new_df)
    except Exception as e:
        st.error(str(e))
    if st.button("add csv urls to collection"):
        urls = new_df.values.tolist()
        st.write(urls)
        res = add_links_to_knowledge_base(
            client=client, kb_name=collection_name, urls=urls
        )
        st.write(res)