Spaces:

derek-thomas
/

RAGDemo

Runtime error

File size: 3,317 Bytes

37d119f
 
 
 
5b6661f
 
9e6b8ed
 
83e218d
629069a
9e6b8ed
83e218d
9e6b8ed
 
629069a
5b6661f
 
 
629069a
 
5b6661f
629069a
 
 
 
9e6b8ed
 
 
 
 
 
 
 
 
 
 
 
5638045
 
 
 
 
 
 
 
 
 
5b6661f
9e6b8ed
 
 
 
629069a
9e6b8ed
 
 
 
 
 
 
5b6661f
 
9e6b8ed
 
 
 
 
 
 
 
 
 
 
 
 
 
629069a
9e6b8ed
 
629069a
9e6b8ed
 
 
 
 
 
629069a
9e6b8ed
 
 
 
7beacaa
9e6b8ed

import subprocess

subprocess.run(["pip", "install", "--upgrade", "transformers[torch,sentencepiece]==4.34.1"])

import logging
from pathlib import Path
from time import perf_counter

import gradio as gr
from jinja2 import Environment, FileSystemLoader

from backend.query_llm import generate
from backend.semantic_search import qd_retriever

proj_dir = Path(__file__).parent
# Setting up the logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set up the template environment with the templates directory
env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))

# Load the templates directly from the environment
template = env.get_template('template.j2')
template_html = env.get_template('template_html.j2')


def add_text(history, text):
    history = [] if history is None else history
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)


def bot(history, system_prompt=""):
    top_k = 5
    query = history[-1][0]

    logger.warning('Retrieving documents...')
    # Retrieve documents relevant to query
    document_start = perf_counter()
    documents = qd_retriever.retrieve(query, top_k=top_k)
    document_time = document_start - perf_counter()
    logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')

    # Create Prompt
    prompt = template.render(documents=documents, query=query)
    prompt_html = template_html.render(documents=documents, query=query)
    logger.warning(prompt)

    history[-1][1] = ""
    for character in generate(prompt, history[:-1]):
        history[-1][1] = character
        yield history, prompt_html


with gr.Blocks() as demo:
    with gr.Tab("Application"):
        chatbot = gr.Chatbot(
                [],
                elem_id="chatbot",
                avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
                               'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
                bubble_full_width=False,
                show_copy_button=True,
                show_share_button=True,
                )

        with gr.Row():
            txt = gr.Textbox(
                    scale=3,
                    show_label=False,
                    placeholder="Enter text and press enter",
                    container=False,
                    )
            txt_btn = gr.Button(value="Submit text", scale=1)

        prompt_html = gr.HTML()
        # Turn off interactivity while generating if you hit enter
        txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
                bot, chatbot, [chatbot, prompt_html])

        # Turn it back on
        txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

        # Turn off interactivity while generating if you hit enter
        txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
                bot, chatbot, [chatbot, prompt_html])

        # Turn it back on
        txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

    gr.Examples(['What is the capital of China, I think its Shanghai?', 'Who won the mens world cup in 2014?'], txt)

demo.queue()
demo.launch(debug=True)