Spaces:
Running
Running
import streamlit as st | |
from retrieve_kb import get_current_knowledge_bases, get_knowledge_base_information | |
from generate_kb import add_links_to_knowledge_base | |
from app import client, default_embedding_function | |
import pandas as pd | |
from tempfile import NamedTemporaryFile | |
import os | |
st.title("Get knowledge boxes") | |
if st.button("Get current knowledge bases"): | |
kbs = get_current_knowledge_bases(client=client) | |
st.json(kbs) | |
collection_name = st.text_input(label="knowledge base name") | |
info = {} | |
collection = None | |
if "df" not in st.session_state: | |
st.session_state["df"] = pd.DataFrame() | |
col1, col2 = st.columns(2) | |
if st.button("Get All"): | |
collection_info, coll = get_knowledge_base_information( | |
client=client, | |
embedding_function=default_embedding_function, | |
kb_name=collection_name, | |
) | |
st.session_state["collection"] = coll | |
collection = coll | |
# st.write(collection_info) | |
df = pd.DataFrame.from_records(collection_info) | |
df["source"] = df["metadatas"].apply(lambda x: x.get("source", "unkown")) | |
df["title"] = df["metadatas"].apply(lambda x: x.get("title", "unkown")) | |
df = df[["documents", "source", "title", "ids"]] | |
st.session_state["df"] = df | |
if len(st.session_state["df"]) != 0: | |
st.dataframe(st.session_state["df"], width=3_000) | |
unique_df = st.session_state["df"]["source"].unique() | |
st.text(f"unique urls: {len(unique_df)}") | |
st.dataframe(unique_df) | |
st.header("Remove a split") | |
id = st.text_input("Insert a split id") | |
if st.button("Remove Id from collection"): | |
if id in st.session_state["df"]["ids"].values.tolist(): | |
res = st.session_state["collection"].delete(ids=[f"id"]) | |
st.success(f"id {id} deleted") | |
else: | |
st.error(f"id {id} not in kb") | |
st.header("Add url to existing collection") | |
url_text = st.text_input("Insert a url link") | |
if st.button("add url to collection"): | |
urls = [url_text] # put in a list even if only one | |
res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls) | |
st.write(res) | |
st.header("Add pdf to existing collection") | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
if st.button("add pdf"): | |
# Create a temporary file | |
with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
# Write the uploaded PDF to the temporary file | |
tmp_file.write(uploaded_file.getvalue()) | |
tmp_path = tmp_file.name | |
print("PATH: ", tmp_path) | |
urls = [tmp_path] | |
res = add_links_to_knowledge_base( | |
client=client, kb_name=collection_name, urls=urls | |
) | |
st.write(res) | |
# Clean up: delete the temporary file | |
os.remove(tmp_path) | |
# if st.button("add pdf"): | |
# urls = [url_text] # put in a list even if only one | |
# res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls) | |
# st.write(res) | |
st.header("Add csv to existing collection") | |
uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"]) | |
df = None | |
if uploaded_file is not None: | |
try: | |
new_df = pd.read_csv(uploaded_file) | |
st.write("DataFrame:") | |
st.write(new_df) | |
except Exception as e: | |
st.error(str(e)) | |
if st.button("add csv urls to collection"): | |
urls = new_df.values.tolist() | |
st.write(urls) | |
res = add_links_to_knowledge_base( | |
client=client, kb_name=collection_name, urls=urls | |
) | |
st.write(res) | |