instruct-evolve-xml-gem2b-adapter / gradio_gemma2-2b.py
fullstack's picture
Upload folder using huggingface_hub
35311ae verified
import gradio as gr
import requests
import re
import html
import traceback
import logging
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
VLLM_URL = "http://localhost:6002/v1/completions"
MODEL = "lora"
def flexible_parse(text):
pattern = r'<(\w+)(?:\s+[^>]*)?>((?:(?!<\1).)*?)</\1>|<(\w+)(?:\s+[^>]*)?>'
result = []
for match in re.finditer(pattern, text):
tag, content, single_tag = match.groups()
if single_tag:
result.append((single_tag, ''))
elif tag:
result.append((tag, content.strip() if content else ''))
return result
def format_as_collapsible_markdown(parsed_content):
markdown = ""
for tag, content in parsed_content:
if content:
markdown += f'<details>\n<summary><strong>{html.escape(tag)}</strong></summary>\n\n{html.escape(content)}\n\n</details>\n\n'
else:
markdown += f'<strong>{html.escape(tag)}</strong>\n\n'
return markdown
def get_completion(title: str, prompt: str):
full_prompt = f"<title>{title}</title>\n<content>\n{prompt}\n</content>"
try:
logging.info(f"Sending request to VLLM server: {VLLM_URL}")
response = requests.post(
VLLM_URL,
json={
"prompt": full_prompt,
"max_tokens": 6000,
"temperature": 1,
"model": MODEL
},
timeout=30000 # Add a timeout
)
response.raise_for_status() # Raise an exception for bad status codes
logging.info("Successfully received response from VLLM server")
return response.json()["choices"][0]["text"]
except requests.exceptions.RequestException as e:
logging.error(f"Error connecting to VLLM server: {str(e)}")
return f"Error connecting to VLLM server: {str(e)}"
except Exception as e:
logging.error(f"Unexpected error in get_completion: {str(e)}")
return f"Unexpected error: {str(e)}\n{traceback.format_exc()}"
def gradio_interface(title, prompt):
try:
logging.info(f"Received request - Title: {title}, Prompt: {prompt}")
raw_response = get_completion(title, prompt)
parsed_content = flexible_parse(raw_response)
collapsible_view = format_as_collapsible_markdown(parsed_content)
combined_output = f"""
## Raw Response:
```
{raw_response}
```
## Parsed Structure:
{collapsible_view}
"""
logging.info("Successfully processed request")
return combined_output
except Exception as e:
logging.error(f"Error in gradio_interface: {str(e)}")
return f"Error in gradio_interface: {str(e)}\n{traceback.format_exc()}"
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Title"),
gr.Textbox(label="Prompt", lines=5)
],
outputs=gr.Markdown(label="Response and Parsed Structure"),
title="VLLM Completion Client with Raw Response and Collapsible View",
description=f"Enter a title and prompt to generate a completion using the {MODEL} model. The raw response and a collapsible view of the parsed structure will be displayed."
)
if __name__ == "__main__":
logging.info("Starting Gradio interface")
iface.launch()