File size: 4,910 Bytes
908499d
e9f685e
 
 
 
 
 
 
908499d
e9f685e
 
 
908499d
e9f685e
 
 
 
908499d
 
 
 
 
 
 
 
 
e9f685e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908499d
 
 
 
 
 
 
 
 
 
 
 
e9f685e
 
 
908499d
 
 
 
 
e9f685e
 
 
 
908499d
 
 
 
 
 
 
 
e9f685e
 
 
 
908499d
e9f685e
908499d
 
 
 
 
 
 
 
e9f685e
 
908499d
 
e9f685e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908499d
 
e9f685e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import gradio as gr
from openai import OpenAI
import os
import numpy as np
from src.document_processing.processor import DocumentProcessor
from src.rag.retriever import Retriever
from src.rag.generator import Generator
from src.api.openai_api import OpenAIAPI

# Initialize OpenAI client
api_key = os.environ.get("OPENAI_API_KEY", "")
openai_api = OpenAIAPI(api_key=api_key)

# Initialize RAG components with OpenAI API
document_processor = DocumentProcessor(api_client=openai_api)
retriever = Retriever(api_client=openai_api)
generator = Generator(api_client=openai_api)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Check if we should use RAG
    use_rag = "bruk dokumenter" in message.lower() or "bruk rag" in message.lower()
    
    if use_rag:
        # Use our RAG implementation with GPT-4o
        try:
            # Retrieve relevant chunks
            retrieved_chunks = retriever.retrieve(message)
            
            # Generate response using RAG
            response = generator.generate(
                query=message,
                retrieved_chunks=retrieved_chunks,
                temperature=temperature
            )
            
            yield response
            return
        except Exception as e:
            # If RAG fails, fall back to standard GPT-4o
            print(f"RAG failed: {str(e)}, falling back to standard GPT-4o")
    
    # Standard GPT-4o approach
    client = OpenAI(api_key=api_key)
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for chunk in client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        content = chunk.choices[0].delta.content
        if content:
            response += content
            yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="Du er en hjelpsom assistent som svarer på norsk. Bruk kunnskapen din til å svare på spørsmål. Hvis brukeren skriver 'bruk dokumenter' eller 'bruk RAG', vil du bruke Retrieval-Augmented Generation for å svare basert på opplastede dokumenter.", 
            label="System message"
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="Norwegian RAG Chatbot with GPT-4o",
    description="En chatbot basert på Retrieval-Augmented Generation (RAG) for norsk språk med GPT-4o. Skriv 'bruk dokumenter' eller 'bruk RAG' i meldingen din for å aktivere RAG-funksjonalitet.",
)

# Create the document upload interface
with gr.Blocks() as document_upload:
    with gr.Tab("Last opp dokumenter"):
        with gr.Row():
            with gr.Column(scale=2):
                file_output = gr.File(label="Opplastede dokumenter")
                upload_button = gr.UploadButton(
                    "Klikk for å laste opp dokument",
                    file_types=["pdf", "txt", "html"],
                    file_count="multiple"
                )
            
            with gr.Column(scale=3):
                documents_list = gr.Dataframe(
                    headers=["Dokument ID", "Filnavn", "Dato", "Chunks"],
                    label="Dokumentliste",
                    interactive=False
                )
        
        process_status = gr.Textbox(label="Status", interactive=False)
        refresh_btn = gr.Button("Oppdater dokumentliste")
        
        # Set up event handlers
        upload_button.upload(
            fn=document_processor.process_document,
            inputs=[upload_button],
            outputs=[process_status, documents_list]
        )
        
        refresh_btn.click(
            fn=lambda: [[doc_id, meta.get("filename", "N/A"), meta.get("processed_date", "N/A"), meta.get("chunk_count", 0)] 
                        for doc_id, meta in document_processor.get_all_documents().items()],
            inputs=None,
            outputs=[documents_list]
        )

# Combine the interfaces
app = gr.TabbedInterface([demo, document_upload], ["Chat", "Dokumenter"])

if __name__ == "__main__":
    app.launch()