File size: 6,488 Bytes
01b8e8e
b8acde7
 
 
 
 
 
dd7488f
01b8e8e
39503cb
01b8e8e
f456ef3
acb72cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710a34d
 
 
 
 
 
 
 
 
 
 
 
 
39503cb
01b8e8e
5634055
01b8e8e
843bc9e
 
 
 
5634055
 
 
dd7488f
39503cb
 
01b8e8e
 
 
 
 
 
6a6afbf
843bc9e
42468fb
101be32
42468fb
 
01b8e8e
 
39503cb
843bc9e
01b8e8e
 
 
 
 
 
 
6a6afbf
01b8e8e
 
 
 
6a6afbf
843bc9e
1b47089
 
843bc9e
1b47089
 
 
 
 
 
 
6a6afbf
1b47089
 
 
 
dbcf2e8
46323da
 
6a6afbf
dbcf2e8
6a6afbf
843bc9e
4107940
 
843bc9e
4107940
 
 
 
 
dbcf2e8
710a34d
dbcf2e8
4107940
 
 
6a6afbf
4107940
 
 
 
 
 
dbcf2e8
46323da
 
6a6afbf
dbcf2e8
6a6afbf
843bc9e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import streamlit as st
from interface.utils import (
    get_pipelines,
    extract_text_from_url,
    extract_text_from_file,
    reset_vars_data,
)
from interface.draw_pipelines import get_pipeline_graph


def component_select_pipeline(container):
    pipeline_names, pipeline_funcs, pipeline_func_parameters = get_pipelines()
    with st.spinner("Loading Pipeline..."):
        with container:
            selected_pipeline = st.selectbox(
                "Select pipeline",
                pipeline_names,
                index=pipeline_names.index("Keyword Search")
                if "Keyword Search" in pipeline_names
                else 0,
            )
            index_pipe = pipeline_names.index(selected_pipeline)
            st.write("---")
            st.header("Pipeline Parameters")
            for parameter, value in pipeline_func_parameters[index_pipe].items():
                if isinstance(value, str):
                    value = st.text_input(parameter, value)
                elif isinstance(value, bool):
                    value = st.checkbox(parameter, value)
                elif isinstance(value, int):
                    value = int(st.number_input(parameter, value=value))
                elif isinstance(value, float):
                    value = float(st.number_input(parameter, value=value))
                pipeline_func_parameters[index_pipe][parameter] = value
            if (
                st.session_state["pipeline"] is None
                or st.session_state["pipeline"]["name"] != selected_pipeline
                or list(
                    st.session_state["pipeline_func_parameters"][index_pipe].values()
                )
                != list(pipeline_func_parameters[index_pipe].values())
            ):
                st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
                (search_pipeline, index_pipeline,) = pipeline_funcs[
                    index_pipe
                ](**pipeline_func_parameters[index_pipe])
                st.session_state["pipeline"] = {
                    "name": selected_pipeline,
                    "search_pipeline": search_pipeline,
                    "index_pipeline": index_pipeline,
                    "doc": pipeline_funcs[index_pipe].__doc__,
                }
                reset_vars_data()
            # TODO: Use elasticsearch and remove this workaround for TFIDF
            # Reload if Keyword Search is selected
            elif st.session_state["pipeline"]["name"] == "Keyword Search":
                st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
                (search_pipeline, index_pipeline,) = pipeline_funcs[
                    index_pipe
                ](**pipeline_func_parameters[index_pipe])
                st.session_state["pipeline"] = {
                    "name": selected_pipeline,
                    "search_pipeline": search_pipeline,
                    "index_pipeline": index_pipeline,
                    "doc": pipeline_funcs[index_pipe].__doc__,
                }


def component_show_pipeline(pipeline, pipeline_name):
    """Draw the pipeline"""
    expander_text = "Show pipeline"
    if pipeline["doc"] is not None and "BUG" in pipeline["doc"]:
        expander_text += "  ⚠️"
    with st.expander(expander_text):
        if pipeline["doc"] is not None:
            st.markdown(pipeline["doc"])
        fig = get_pipeline_graph(pipeline[pipeline_name])
        st.plotly_chart(fig, use_container_width=True)


def component_show_search_result(container, results):
    with container:
        for idx, document in enumerate(results):
            st.markdown(f"### Match {idx+1}")
            st.markdown(f"**Text**: {document['text']}")
            st.markdown(f"**Document**: {document['id']}")
            if "_split_id" in document["meta"]:
                st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
            if "score" in document:
                st.markdown(f"**Score**: {document['score']:.3f}")
            if "content_audio" in document:
                st.audio(str(document["content_audio"]))
            st.markdown("---")


def component_text_input(container, doc_id):
    """Draw the Text Input widget"""
    with container:
        texts = []
        with st.expander("Enter documents"):
            while True:
                text = st.text_input(f"Document {doc_id}", key=doc_id)
                if text != "":
                    texts.append({"text": text, "doc_id": doc_id})
                    doc_id += 1
                    st.markdown("---")
                else:
                    break
        corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in texts]
        return corpus, doc_id


def component_article_url(container, doc_id):
    """Draw the Article URL widget"""
    with container:
        urls = []
        with st.expander("Enter URLs"):
            while True:
                url = st.text_input(f"URL {doc_id}", key=doc_id)
                if url != "":
                    urls.append({"text": extract_text_from_url(url), "doc_id": doc_id})
                    doc_id += 1
                    st.markdown("---")
                else:
                    break

        for idx, doc in enumerate(urls):
            with st.expander(f"Preview URL {idx}"):
                st.write(doc["text"])

        corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in urls]
        return corpus, doc_id


def component_file_input(container, doc_id):
    """Draw the extract text from file widget"""
    with container:
        files = []
        with st.expander("Enter Files"):
            while True:
                file = st.file_uploader(
                    "Upload a .txt, .pdf, .csv, image file, audio file", key=doc_id
                )
                if file != None:
                    extracted_text = extract_text_from_file(file)
                    if extracted_text != None:
                        files.append({"text": extracted_text, "doc_id": doc_id})
                        doc_id += 1
                        st.markdown("---")
                    else:
                        break
                else:
                    break

        for idx, doc in enumerate(files):
            with st.expander(f"Preview File {idx}"):
                st.write(doc["text"])

        corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in files]
        return corpus, doc_id