File size: 5,202 Bytes
01b8e8e
4107940
dd7488f
01b8e8e
39503cb
01b8e8e
f456ef3
01b8e8e
 
39503cb
101be32
39503cb
 
 
01b8e8e
f456ef3
 
 
 
 
 
 
 
 
 
 
 
 
f65e26a
 
 
304cf45
 
f65e26a
f456ef3
304cf45
 
 
6c3736e
f65e26a
 
 
5634055
6c3736e
39503cb
01b8e8e
5634055
01b8e8e
39503cb
5634055
 
 
dd7488f
39503cb
 
01b8e8e
 
 
 
 
 
1d466d7
101be32
1d466d7
 
01b8e8e
 
39503cb
01b8e8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39503cb
01b8e8e
39503cb
1b47089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbcf2e8
46323da
 
 
dbcf2e8
1b47089
 
 
 
4107940
 
 
 
 
 
 
 
 
dbcf2e8
 
 
4107940
 
 
 
 
 
 
 
 
 
dbcf2e8
46323da
 
 
dbcf2e8
4107940
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import streamlit as st
from interface.utils import get_pipelines, extract_text_from_url, extract_text_from_file
from interface.draw_pipelines import get_pipeline_graph


def component_select_pipeline(container):
    pipeline_names, pipeline_funcs, pipeline_func_parameters = get_pipelines()
    with container:
        selected_pipeline = st.selectbox(
            "Select pipeline",
            pipeline_names,
            index=pipeline_names.index("Keyword Search")
            if "Keyword Search" in pipeline_names
            else 0,
        )
        index_pipe = pipeline_names.index(selected_pipeline)
        st.write("---")
        st.header("Pipeline Parameters")
        for parameter, value in pipeline_func_parameters[index_pipe].items():
            if isinstance(value, str):
                value = st.text_input(parameter, value)
            elif isinstance(value, bool):
                value = st.checkbox(parameter, value)
            elif isinstance(value, int):
                value = int(st.number_input(parameter, value))
            elif isinstance(value, float):
                value = float(st.number_input(parameter, value))
            pipeline_func_parameters[index_pipe][parameter] = value
        if (
            st.session_state["pipeline"] is None
            or st.session_state["pipeline"]["name"] != selected_pipeline
            or list(st.session_state["pipeline_func_parameters"][index_pipe].values())
            != list(pipeline_func_parameters[index_pipe].values())
        ):
            st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
            (search_pipeline, index_pipeline,) = pipeline_funcs[
                index_pipe
            ](**pipeline_func_parameters[index_pipe])
            st.session_state["pipeline"] = {
                "name": selected_pipeline,
                "search_pipeline": search_pipeline,
                "index_pipeline": index_pipeline,
                "doc": pipeline_funcs[index_pipe].__doc__,
            }


def component_show_pipeline(pipeline, pipeline_name):
    """Draw the pipeline"""
    with st.expander("Show pipeline"):
        if pipeline["doc"] is not None:
            st.markdown(pipeline["doc"])
        fig = get_pipeline_graph(pipeline[pipeline_name])
        st.plotly_chart(fig, use_container_width=True)


def component_show_search_result(container, results):
    with container:
        for idx, document in enumerate(results):
            st.markdown(f"### Match {idx+1}")
            st.markdown(f"**Text**: {document['text']}")
            st.markdown(f"**Document**: {document['id']}")
            if 'score' in document:
                st.markdown(f"**Score**: {document['score']:.3f}")
            if 'content_audio' in document:
                st.audio(str(document['content_audio']))
            st.markdown("---")


def component_text_input(container):
    """Draw the Text Input widget"""
    with container:
        texts = []
        doc_id = 1
        with st.expander("Enter documents"):
            while True:
                text = st.text_input(f"Document {doc_id}", key=doc_id)
                if text != "":
                    texts.append({"text": text})
                    doc_id += 1
                    st.markdown("---")
                else:
                    break
        corpus = [
            {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(texts)
        ]
        return corpus


def component_article_url(container):
    """Draw the Article URL widget"""
    with container:
        urls = []
        doc_id = 1
        with st.expander("Enter URLs"):
            while True:
                url = st.text_input(f"URL {doc_id}", key=doc_id)
                if url != "":
                    urls.append({"text": extract_text_from_url(url)})
                    doc_id += 1
                    st.markdown("---")
                else:
                    break

        for idx, doc in enumerate(urls):
            with st.expander(f"Preview URL {idx}"):
                st.write(doc)

        corpus = [
            {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
        ]
        return corpus


def component_file_input(container):
    """Draw the extract text from file widget"""
    with container:
        files = []
        doc_id = 1
        with st.expander("Enter Files"):
            while True:
                file = st.file_uploader(
                    "Upload a .txt, .pdf, .csv, image file", key=doc_id
                )
                if file != None:
                    extracted_text = extract_text_from_file(file)
                    if extracted_text != None:
                        files.append({"text": extracted_text})
                        doc_id += 1
                        st.markdown("---")
                    else:
                        break
                else:
                    break

        for idx, doc in enumerate(files):
            with st.expander(f"Preview File {idx}"):
                st.write(doc)

        corpus = [
            {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
        ]
        return corpus