import uvicorn from fastapi import FastAPI, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse, StreamingResponse from huggingface_hub import InferenceClient from pydantic import BaseModel app = FastAPI() # Configure CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct") class ChatRequest(BaseModel): message: str history: list[tuple[str, str]] system_message: str max_tokens: int temperature: float top_p: float def generate_response(messages, max_tokens, temperature, top_p): for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): yield chunk.choices[0].delta.content or "" @app.post("/api/chat") async def chat_stream(request: ChatRequest): try: messages = [{"role": "system", "content": request.system_message}] for user_msg, assistant_msg in request.history: messages.extend([ {"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg} ]) messages.append({"role": "user", "content": request.message}) return StreamingResponse( generate_response( messages=messages, max_tokens=request.max_tokens, temperature=request.temperature, top_p=request.top_p ), media_type="text/event-stream" ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/", response_class=HTMLResponse) async def read_root(): # Serve the HTML content directly html_content = """ Qwen2.5 Coder - AI Assistant

Chat with Qwen2.5 Coder

Qwen2.5 Coder

Hello! I'm Qwen2.5 Coder, a 32B parameter AI assistant specialized in coding and technical questions. How can I help you today?

Qwen2.5 Coder may produce inaccurate information about people, places, or facts.

""" return HTMLResponse(content=html_content) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)