|
import gradio as gr |
|
import requests |
|
import re |
|
import html |
|
import traceback |
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
VLLM_URL = "http://localhost:6002/v1/completions" |
|
MODEL = "lora" |
|
|
|
def flexible_parse(text): |
|
pattern = r'<(\w+)(?:\s+[^>]*)?>((?:(?!<\1).)*?)</\1>|<(\w+)(?:\s+[^>]*)?>' |
|
result = [] |
|
|
|
for match in re.finditer(pattern, text): |
|
tag, content, single_tag = match.groups() |
|
if single_tag: |
|
result.append((single_tag, '')) |
|
elif tag: |
|
result.append((tag, content.strip() if content else '')) |
|
|
|
return result |
|
|
|
def format_as_collapsible_markdown(parsed_content): |
|
markdown = "" |
|
for tag, content in parsed_content: |
|
if content: |
|
markdown += f'<details>\n<summary><strong>{html.escape(tag)}</strong></summary>\n\n{html.escape(content)}\n\n</details>\n\n' |
|
else: |
|
markdown += f'<strong>{html.escape(tag)}</strong>\n\n' |
|
return markdown |
|
|
|
def get_completion(title: str, prompt: str): |
|
full_prompt = f"<title>{title}</title>\n<content>\n{prompt}\n</content>" |
|
try: |
|
logging.info(f"Sending request to VLLM server: {VLLM_URL}") |
|
response = requests.post( |
|
VLLM_URL, |
|
json={ |
|
"prompt": full_prompt, |
|
"max_tokens": 6000, |
|
"temperature": 1, |
|
"model": MODEL |
|
}, |
|
timeout=30000 |
|
) |
|
response.raise_for_status() |
|
logging.info("Successfully received response from VLLM server") |
|
return response.json()["choices"][0]["text"] |
|
except requests.exceptions.RequestException as e: |
|
logging.error(f"Error connecting to VLLM server: {str(e)}") |
|
return f"Error connecting to VLLM server: {str(e)}" |
|
except Exception as e: |
|
logging.error(f"Unexpected error in get_completion: {str(e)}") |
|
return f"Unexpected error: {str(e)}\n{traceback.format_exc()}" |
|
|
|
def gradio_interface(title, prompt): |
|
try: |
|
logging.info(f"Received request - Title: {title}, Prompt: {prompt}") |
|
raw_response = get_completion(title, prompt) |
|
parsed_content = flexible_parse(raw_response) |
|
collapsible_view = format_as_collapsible_markdown(parsed_content) |
|
|
|
combined_output = f""" |
|
## Raw Response: |
|
|
|
``` |
|
{raw_response} |
|
``` |
|
|
|
## Parsed Structure: |
|
|
|
{collapsible_view} |
|
""" |
|
logging.info("Successfully processed request") |
|
return combined_output |
|
except Exception as e: |
|
logging.error(f"Error in gradio_interface: {str(e)}") |
|
return f"Error in gradio_interface: {str(e)}\n{traceback.format_exc()}" |
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
gr.Textbox(label="Title"), |
|
gr.Textbox(label="Prompt", lines=5) |
|
], |
|
outputs=gr.Markdown(label="Response and Parsed Structure"), |
|
title="VLLM Completion Client with Raw Response and Collapsible View", |
|
description=f"Enter a title and prompt to generate a completion using the {MODEL} model. The raw response and a collapsible view of the parsed structure will be displayed." |
|
) |
|
|
|
if __name__ == "__main__": |
|
logging.info("Starting Gradio interface") |
|
iface.launch() |