Spaces:
Runtime error
Runtime error
File size: 6,488 Bytes
01b8e8e b8acde7 dd7488f 01b8e8e 39503cb 01b8e8e f456ef3 acb72cc 710a34d 39503cb 01b8e8e 5634055 01b8e8e 843bc9e 5634055 dd7488f 39503cb 01b8e8e 6a6afbf 843bc9e 42468fb 101be32 42468fb 01b8e8e 39503cb 843bc9e 01b8e8e 6a6afbf 01b8e8e 6a6afbf 843bc9e 1b47089 843bc9e 1b47089 6a6afbf 1b47089 dbcf2e8 46323da 6a6afbf dbcf2e8 6a6afbf 843bc9e 4107940 843bc9e 4107940 dbcf2e8 710a34d dbcf2e8 4107940 6a6afbf 4107940 dbcf2e8 46323da 6a6afbf dbcf2e8 6a6afbf 843bc9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import streamlit as st
from interface.utils import (
get_pipelines,
extract_text_from_url,
extract_text_from_file,
reset_vars_data,
)
from interface.draw_pipelines import get_pipeline_graph
def component_select_pipeline(container):
pipeline_names, pipeline_funcs, pipeline_func_parameters = get_pipelines()
with st.spinner("Loading Pipeline..."):
with container:
selected_pipeline = st.selectbox(
"Select pipeline",
pipeline_names,
index=pipeline_names.index("Keyword Search")
if "Keyword Search" in pipeline_names
else 0,
)
index_pipe = pipeline_names.index(selected_pipeline)
st.write("---")
st.header("Pipeline Parameters")
for parameter, value in pipeline_func_parameters[index_pipe].items():
if isinstance(value, str):
value = st.text_input(parameter, value)
elif isinstance(value, bool):
value = st.checkbox(parameter, value)
elif isinstance(value, int):
value = int(st.number_input(parameter, value=value))
elif isinstance(value, float):
value = float(st.number_input(parameter, value=value))
pipeline_func_parameters[index_pipe][parameter] = value
if (
st.session_state["pipeline"] is None
or st.session_state["pipeline"]["name"] != selected_pipeline
or list(
st.session_state["pipeline_func_parameters"][index_pipe].values()
)
!= list(pipeline_func_parameters[index_pipe].values())
):
st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
(search_pipeline, index_pipeline,) = pipeline_funcs[
index_pipe
](**pipeline_func_parameters[index_pipe])
st.session_state["pipeline"] = {
"name": selected_pipeline,
"search_pipeline": search_pipeline,
"index_pipeline": index_pipeline,
"doc": pipeline_funcs[index_pipe].__doc__,
}
reset_vars_data()
# TODO: Use elasticsearch and remove this workaround for TFIDF
# Reload if Keyword Search is selected
elif st.session_state["pipeline"]["name"] == "Keyword Search":
st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
(search_pipeline, index_pipeline,) = pipeline_funcs[
index_pipe
](**pipeline_func_parameters[index_pipe])
st.session_state["pipeline"] = {
"name": selected_pipeline,
"search_pipeline": search_pipeline,
"index_pipeline": index_pipeline,
"doc": pipeline_funcs[index_pipe].__doc__,
}
def component_show_pipeline(pipeline, pipeline_name):
"""Draw the pipeline"""
expander_text = "Show pipeline"
if pipeline["doc"] is not None and "BUG" in pipeline["doc"]:
expander_text += " ⚠️"
with st.expander(expander_text):
if pipeline["doc"] is not None:
st.markdown(pipeline["doc"])
fig = get_pipeline_graph(pipeline[pipeline_name])
st.plotly_chart(fig, use_container_width=True)
def component_show_search_result(container, results):
with container:
for idx, document in enumerate(results):
st.markdown(f"### Match {idx+1}")
st.markdown(f"**Text**: {document['text']}")
st.markdown(f"**Document**: {document['id']}")
if "_split_id" in document["meta"]:
st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
if "score" in document:
st.markdown(f"**Score**: {document['score']:.3f}")
if "content_audio" in document:
st.audio(str(document["content_audio"]))
st.markdown("---")
def component_text_input(container, doc_id):
"""Draw the Text Input widget"""
with container:
texts = []
with st.expander("Enter documents"):
while True:
text = st.text_input(f"Document {doc_id}", key=doc_id)
if text != "":
texts.append({"text": text, "doc_id": doc_id})
doc_id += 1
st.markdown("---")
else:
break
corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in texts]
return corpus, doc_id
def component_article_url(container, doc_id):
"""Draw the Article URL widget"""
with container:
urls = []
with st.expander("Enter URLs"):
while True:
url = st.text_input(f"URL {doc_id}", key=doc_id)
if url != "":
urls.append({"text": extract_text_from_url(url), "doc_id": doc_id})
doc_id += 1
st.markdown("---")
else:
break
for idx, doc in enumerate(urls):
with st.expander(f"Preview URL {idx}"):
st.write(doc["text"])
corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in urls]
return corpus, doc_id
def component_file_input(container, doc_id):
"""Draw the extract text from file widget"""
with container:
files = []
with st.expander("Enter Files"):
while True:
file = st.file_uploader(
"Upload a .txt, .pdf, .csv, image file, audio file", key=doc_id
)
if file != None:
extracted_text = extract_text_from_file(file)
if extracted_text != None:
files.append({"text": extracted_text, "doc_id": doc_id})
doc_id += 1
st.markdown("---")
else:
break
else:
break
for idx, doc in enumerate(files):
with st.expander(f"Preview File {idx}"):
st.write(doc["text"])
corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in files]
return corpus, doc_id
|