File size: 6,303 Bytes
37c1830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81beec9
37c1830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81beec9
37c1830
 
 
 
 
 
 
 
81beec9
 
 
 
 
37c1830
81beec9
 
37c1830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81beec9
0658fcd
81beec9
 
 
 
 
37c1830
81beec9
37c1830
 
 
 
 
f7e4967
37c1830
f7e4967
 
37c1830
f7e4967
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import asyncio
import os.path
import tempfile
import uuid
from typing import List

import gradio
import gradio as gr
import openai
import pandas as pd
from autorag.evaluator import Evaluator

from src.data.chunk import chunk
from src.data.parse import parse_pdf
from src.runner import GradioStreamRunner
from gradio import ChatMessage

root_dir = os.path.dirname(os.path.realpath(__file__))

pseudo_trial_yaml_path = os.path.join(root_dir, "config", "init_project_for_pseudo_trial.yaml")
init_run_yaml = os.path.join(root_dir, "config", "init_project_for_run.yaml")

gradio_runner = None

# Code for Task 1
def file_ingest(input_files: List[str], temp_project_dir, progress=gr.Progress()):
    if os.getenv("OPENAI_API_KEY") is None:
        return "Please submit your OpenAI API key first."
    if not input_files:
        return "Please upload a file first."
    progress(0.05)
    # do parse
    raw_df = parse_pdf(file_lists=input_files)
    progress(0.3)
    # do chunk
    corpus_df = chunk(raw_df, method="recursivecharacter",
                      lang="en", chunk_size=512, chunk_overlap=128)
    progress(0.5)
    asyncio.sleep(0.5)

    # Logic for button click
    empty_qa_df = make_empty_qa(corpus_df=corpus_df)
    with tempfile.TemporaryDirectory() as temp_data_dir:
        empty_qa_df.to_parquet(os.path.join(temp_data_dir, "empty_qa.parquet"))
        corpus_df.to_parquet(os.path.join(temp_data_dir, "corpus.parquet"))

        evaluator = Evaluator(qa_data_path=os.path.join(temp_data_dir, "empty_qa.parquet"),
                              corpus_data_path=os.path.join(temp_data_dir, "corpus.parquet"),
                              project_dir=temp_project_dir)
        evaluator.start_trial(pseudo_trial_yaml_path, skip_validation=True)
        yield "Setting up"
        progress(0.9)
        set_runner(temp_project_dir)
        progress(1.0)
        yield "File uploaded complete. You can use it at chatbot now."


def make_empty_qa(corpus_df: pd.DataFrame):
    doc_id = corpus_df["doc_id"].iloc[0]
    return pd.DataFrame({
        "qid": str(uuid.uuid4()),
        "query": ["Who is Kai Havertz?"],
        "retrieval_gt": [[[doc_id]]],
        "generation_gt": [["Havertz is the greatest footballer."]],
    })


def on_submit_openai_key(openai_key):
    os.environ["OPENAI_API_KEY"] = openai_key
    # Test openai key
    try:
        client = openai.OpenAI()
        response = client.chat.completions.create(
            messages=[
                {"role": "user", "content": "What is the capital of France?"},
            ],
            model="gpt-4o-mini",
            max_tokens=3,
        )
        assert isinstance(response.choices[0].message.content, str)
        gr.Info("OpenAI API key submitted.", duration=3)
        return "Setting complete."
    except openai.AuthenticationError as e:
        gr.Error("OpenAI API key is invalid.", duration=3)
        return "Not Set"
    except AssertionError as e:
        gr.Error("OpenAI server is not working properly.", duration=3)
        return "Not Set"


def set_runner(project_dir):
    runner = GradioStreamRunner.from_yaml(yaml_path=init_run_yaml, project_dir=project_dir)
    global gradio_runner
    gradio_runner = runner


def get_response(history):
    global gradio_runner
    if gradio_runner is None:
        gradio.Warning("Please set the AutoRAG server first.")
        return
    if os.getenv("OPENAI_API_KEY", None) is None:
        gradio.Warning("Please submit your OpenAI API key first.")
        return

    history.append({"role": "assistant", "content": ""})
    for output in gradio_runner.stream_run(history[-2]["content"]):
        stream_delta = output[0]
        history[-1]["content"] = stream_delta
        yield history

def user(user_message, history: list):
    return "", history + [{"role": "user", "content": user_message}]

# interface one
with gr.Blocks(theme="earneleh/paris") as demo:
    with tempfile.TemporaryDirectory() as project_dir:
        # Define components
        with gr.Row():
            with gr.Column(scale=3):
                textbox = gr.Textbox(label="Please input your OpenAI API key and press Enter.", type="password",
                         info="You can get your API key from https://platform.openai.com/account/api-keys\n"
                         "AutoRAG do not store your API key.",
                                     autofocus=True)
                api_key_status_box = gr.Textbox(label="OpenAI API status", value="Not Set", interactive=False)

                gr.Markdown("## Ingest Your Data")

                file_input = gr.File(label="Upload Files", type="filepath", file_count="multiple")
                button = gr.Button("Submit file")
                text_output = gr.Textbox(label="Status update", interactive=False)

                # Define layout and interactions
                textbox.submit(on_submit_openai_key, inputs=[textbox], outputs=api_key_status_box)
                button.click(file_ingest, inputs=[file_input, gr.State(project_dir)], outputs=[text_output])

            with gr.Column(scale=7):
                gr.Markdown("## This is your Naive RAG Chatbot 🚀")
                chatbot = gr.Chatbot(type="messages", height=600)
                chat_input = gr.Textbox()
                clear = gr.Button(value="Clear Chat🗑️")

                chat_input.submit(user, [chat_input, chatbot], outputs=[chat_input, chatbot], queue=False).then(
                    get_response, inputs=chatbot, outputs=[chatbot]
                )
                clear.click(lambda: None, None, chatbot, queue=False)

        gr.Markdown("## Do you like the result?\n\nIf you don't like it, try to optimize it with AutoRAG. Press below button and go to make evaluation data and optimize it. Both on the Huggingface space so you don't need to install anything.")
        with gr.Row():
            open_data_creation = gr.Button(value="1️⃣ : Data Creation",
                                           link="https://huggingface.co/spaces/AutoRAG/AutoRAG-data-creation")
            open_optimize = gr.Button(value="2️⃣ : Optimize", link="https://huggingface.co/spaces/AutoRAG/RAG-Pipeline-Optimization")

# if __name__ == "__main__":
#     demo.launch(share=False, debug=True)

demo.launch(share=False, debug=False)