Spaces:

AI4free
/

v1-chat

Running

App Files Files Community

Abhaykoul commited on 7 days ago

Commit

c29cacb

•

1 Parent(s): ee5c64f

Update app.py

Browse files

Files changed (1) hide show

app.py +265 -255

app.py CHANGED Viewed

@@ -1,294 +1,304 @@
-from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from typing import List, Dict, Optional, Union
-from fastapi.responses import StreamingResponse
-import logging
-import uuid
-import time
 import json
-import asyncio
-import random
-import httpx
-from fake_useragent import UserAgent
-app = FastAPI()
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-MODEL_MAPPING = {
-    "keyless-gpt-4o-mini": "gpt-4o-mini",
-    "keyless-claude-3-haiku": "claude-3-haiku-20240307",
-    "keyless-mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "keyless-meta-Llama-3.1-70B-Instruct-Turbo": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
-}
-class ModelInfo(BaseModel):
-    id: str
-    object: str = "model"
-    created: int = int(time.time())
-    owned_by: str = "custom"
 class ChatMessage(BaseModel):
-    role: str
-    content: str
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: List[ChatMessage]
-    temperature: Optional[float] = 1.0
-    top_p: Optional[float] = 1.0
-    n: Optional[int] = 1
-    stream: Optional[bool] = False
-    stop: Optional[Union[str, List[str]]] = None
-    max_tokens: Optional[int] = None
-    presence_penalty: Optional[float] = 0.0
-    frequency_penalty: Optional[float] = 0.0
-    logit_bias: Optional[Dict[str, float]] = None
-    user: Optional[str] = None
-class ChatCompletionResponseChoice(BaseModel):
     index: int
     message: ChatMessage
     finish_reason: Optional[str] = None
-class ChatCompletionResponseUsage(BaseModel):
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
 class ChatCompletionResponse(BaseModel):
     id: str
-    object: str = "chat.completion"
     created: int
     model: str
-    choices: List[ChatCompletionResponseChoice]
-    usage: ChatCompletionResponseUsage
-class DeltaMessage(BaseModel):
-    role: Optional[str] = None
-    content: Optional[str] = None
-class ChatCompletionStreamResponseChoice(BaseModel):
     index: int
-    delta: DeltaMessage
     finish_reason: Optional[str] = None
-class ChatCompletionStreamResponse(BaseModel):
     id: str
-    object: str = "chat.completion.chunk"
     created: int
     model: str
-    choices: List[ChatCompletionStreamResponseChoice]
-# Store active conversations
-conversations: Dict[str, List[ChatMessage]] = {}
-ua = UserAgent()
-def get_next_user_agent():
-    return ua.random
-async def update_vqd_token(user_agent):
-    async with httpx.AsyncClient() as client:
-        try:
-            await client.get("https://duckduckgo.com/country.json", headers={"User-Agent": user_agent})
-            headers = {"x-vqd-accept": "1", "User-Agent": user_agent}
-            response = await client.get("https://duckduckgo.com/duckchat/v1/status", headers=headers)
-            if response.status_code == 200:
-                vqd_token = response.headers.get("x-vqd-4", "")
-                logging.info(f"Fetched new x-vqd-4 token: {vqd_token}")
-                return vqd_token
-            else:
-                logging.warning(f"Failed to fetch x-vqd-4 token. Status code: {response.status_code}")
-                return ""
-        except Exception as e:
-            logging.error(f"Error fetching x-vqd-4 token: {str(e)}")
-            return ""
-async def chat_with_duckduckgo(query: str, model: str, conversation_history: List[ChatMessage]):
-    original_model = MODEL_MAPPING.get(model, model)
-    user_agent = get_next_user_agent()
-    vqd_token = await update_vqd_token(user_agent)
-    if not vqd_token:
-        raise HTTPException(status_code=500, detail="Failed to obtain VQD token")
-    # If there is a system message, add it before the first user message (DDG AI doesnt let us send system messages, so this is a workaround -- fundamentally, it works the same way when setting a system prompt)
-    system_message = next((msg for msg in conversation_history if msg.role == "system"), None)
-    user_messages = [{"role": msg.role, "content": msg.content} for msg in conversation_history if msg.role == "user"]
-    if system_message and user_messages:
-        user_messages[0]["content"] = f"{system_message.content}\n\n{user_messages[0]['content']}"
-    payload = {
-        "messages": user_messages,
-        "model": original_model
-    }
-    headers = {
-        "x-vqd-4": vqd_token,
-        "Content-Type": "application/json",
-        "User-Agent": user_agent
-    }
-    logging.info(f"Sending payload to DuckDuckGo with User-Agent: {user_agent}")
-    async with httpx.AsyncClient() as client:
         try:
-            response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
-            if response.status_code == 200:
-                full_response = ""
-                async for line in response.aiter_lines():
-                    if line.startswith("data: "):
-                        data = line[6:].strip()
-                        if data == "[DONE]":
-                            break
                         try:
-                            json_data = json.loads(data)
-                            message = json_data.get("message", "")
-                            full_response += message
-                            yield message
-                        except json.JSONDecodeError:
-                            logging.warning(f"Failed to parse JSON: {data}")
-            elif response.status_code == 429:
-                logging.warning("Rate limit exceeded. Changing User-Agent and retrying.")
-                for attempt in range(5):  # Try up to 5 times
-                    user_agent = get_next_user_agent()
-                    vqd_token = await update_vqd_token(user_agent)
-                    headers["User-Agent"] = user_agent
-                    headers["x-vqd-4"] = vqd_token
-                    logging.info(f"Retrying with new User-Agent: {user_agent}")
-                    response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
-                    if response.status_code == 200:
-                        async for line in response.aiter_lines():
                             if line.startswith("data: "):
-                                data = line[6:].strip()
-                                if data == "[DONE]":
                                     break
-                                try:
-                                    json_data = json.loads(data)
-                                    message = json_data.get("message", "")
-                                    yield message
-                                except json.JSONDecodeError:
-                                    logging.warning(f"Failed to parse JSON: {data}")
-                        break
-                else:
-                    raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
-            else:
-                logging.error(f"Error response from DuckDuckGo. Status code: {response.status_code}")
-                raise HTTPException(status_code=response.status_code, detail=f"Error communicating with DuckDuckGo: {response.text}")
-        except httpx.HTTPStatusError as e:
-            logging.error(f"HTTP error occurred: {str(e)}")
-            raise HTTPException(status_code=e.response.status_code, detail=str(e))
-        except httpx.RequestError as e:
-            logging.error(f"Request error occurred: {str(e)}")
-            raise HTTPException(status_code=500, detail=str(e))
         except Exception as e:
-            logging.error(f"Unexpected error in chat_with_duckduckgo: {str(e)}")
-            raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
-@app.get("/v1/models")
-async def list_models():
-    logging.info("Listing available models")
-    models = [ModelInfo(id=model_id) for model_id in MODEL_MAPPING.keys()]
-    return {"data": models, "object": "list"}
-@app.post("/v1/chat/completions")
-async def chat_completion(request: ChatCompletionRequest):
-    conversation_id = str(uuid.uuid4())
-    logging.info(f"Received chat completion request for conversation {conversation_id}")
-    logging.info(f"Request: {request.model_dump()}")
-    conversation_history = conversations.get(conversation_id, [])
-    conversation_history.extend(request.messages)
-    async def generate():
         try:
-            full_response = ""
-            async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
-                full_response += chunk
-                response = ChatCompletionStreamResponse(
-                    id=conversation_id,
-                    created=int(time.time()),
-                    model=request.model,
-                    choices=[
-                        ChatCompletionStreamResponseChoice(
-                            index=0,
-                            delta=DeltaMessage(content=chunk),
-                            finish_reason=None
                         )
-                    ]
-                )
-                yield f"data: {response.model_dump_json()}\n\n"
-                await asyncio.sleep(random.uniform(0.05, 0.1))
-            final_response = ChatCompletionStreamResponse(
-                id=conversation_id,
-                created=int(time.time()),
-                model=request.model,
-                choices=[
-                    ChatCompletionStreamResponseChoice(
-                        index=0,
-                        delta=DeltaMessage(),
-                        finish_reason="stop"
-                    )
-                ]
             )
-            yield f"data: {final_response.model_dump_json()}\n\n"
-            yield "data: [DONE]\n\n"
         except Exception as e:
-            logging.error(f"Error during streaming: {str(e)}")
-            yield f"data: {json.dumps({'error': str(e)})}\n\n"
-    if request.stream:
-        return StreamingResponse(generate(), media_type="text/event-stream")
-    else:
-        full_response = ""
-        async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
-            full_response += chunk
-        response = ChatCompletionResponse(
-            id=conversation_id,
-            created=int(time.time()),
-            model=request.model,
-            choices=[
-                ChatCompletionResponseChoice(
-                    index=0,
-                    message=ChatMessage(role="assistant", content=full_response),
-                    finish_reason="stop"
-                )
-            ],
-            usage=ChatCompletionResponseUsage(
-                prompt_tokens=sum(len(msg.content.split()) for msg in conversation_history),
-                completion_tokens=len(full_response.split()),
-                total_tokens=sum(len(msg.content.split()) for msg in conversation_history) + len(full_response.split())
-            )
-        )
-        conversation_history.append(ChatMessage(role="assistant", content=full_response))
-        conversations[conversation_id] = conversation_history
-        return response
-@app.delete("/v1/conversations/{conversation_id}")
-async def end_conversation(conversation_id: str):
-    if conversation_id in conversations:
-        del conversations[conversation_id]
-        logging.info(f"Conversation {conversation_id} ended and context cleared")
-        return {"message": f"Conversation {conversation_id} ended and context cleared."}
-    else:
-        logging.warning(f"Attempt to end non-existent conversation {conversation_id}")
-        raise HTTPException(status_code=404, detail="Conversation not found")
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

+from fastapi import FastAPI, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
+from sse_starlette.sse import EventSourceResponse
+from pydantic import BaseModel, Field
+from typing import AsyncGenerator, Optional, List, Dict, Any
+from enum import Enum
+from datetime import datetime
 import json
+import aiohttp
+from functools import lru_cache
+import os
+class Role(str, Enum):
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
 class ChatMessage(BaseModel):
+    role: Optional[Role] = None
+    content: Optional[str] = None
+    def to_dict(self) -> Dict[str, Any]:
+        message_dict = {}
+        if self.role is not None:
+            message_dict['role'] = self.role
+        if self.content is not None:
+            message_dict['content'] = self.content
+        return message_dict
+class UsageInfo(BaseModel):
+    prompt_tokens: Optional[int] = None
+    completion_tokens: Optional[int] = None
+    total_tokens: Optional[int] = None
+    estimated_cost: Optional[float] = None
+class ChatCompletionChoice(BaseModel):
     index: int
     message: ChatMessage
     finish_reason: Optional[str] = None
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'index': self.index,
+            'message': self.message.to_dict(),
+            'finish_reason': self.finish_reason
+        }
 class ChatCompletionResponse(BaseModel):
     id: str
+    object: str
     created: int
     model: str
+    choices: List[ChatCompletionChoice]
+    usage: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'id': self.id,
+            'object': self.object,
+            'created': self.created,
+            'model': self.model,
+            'choices': [choice.to_dict() for choice in self.choices],
+            'usage': self.usage
+        }
+class ChatCompletionChunkChoice(BaseModel):
     index: int
+    delta: ChatMessage
     finish_reason: Optional[str] = None
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'index': self.index,
+            'delta': self.delta.to_dict(),
+            'finish_reason': self.finish_reason
+        }
+class ChatCompletionChunk(BaseModel):
     id: str
+    object: str
     created: int
     model: str
+    choices: List[ChatCompletionChunkChoice]
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'id': self.id,
+            'object': self.object,
+            'created': self.created,
+            'model': self.model,
+            'choices': [choice.to_dict() for choice in self.choices]
+        }
+class ChatRequest(BaseModel):
+    messages: List[ChatMessage]
+    model: str = Field(default="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")
+    temperature: float = Field(default=0.7, ge=0.0, le=2.0)
+    max_tokens: Optional[int] = Field(default=2048)
+    stream: bool = Field(default=False)
+    response_format: Optional[Dict[str, str]] = None
+class DeepInfraClient:
+    def __init__(self, api_key: Optional[str] = None):
+        self.url = "https://api.deepinfra.com/v1/openai/chat/completions"
+        self.headers = {
+            "Accept": "text/event-stream, application/json",
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            self.headers["Authorization"] = f"Bearer {api_key}"
+    def _prepare_messages(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
+        return [message.to_dict() for message in messages]
+    async def generate_stream(self, request: ChatRequest) -> AsyncGenerator:
+        payload = {
+            "model": request.model,
+            "messages": self._prepare_messages(request.messages),
+            "temperature": request.temperature,
+            "max_tokens": request.max_tokens,
+            "stream": True
+        }
+        if request.response_format:
+            payload["response_format"] = request.response_format
+        timeout = aiohttp.ClientTimeout(total=300)
         try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.post(
+                    self.url,
+                    headers=self.headers,
+                    json=payload,
+                    chunked=True
+                ) as response:
+                    if response.status != 200:
+                        error_msg = await response.text()
+                        raise HTTPException(
+                            status_code=response.status,
+                            detail=f"API request failed: {error_msg}"
+                        )
+                    async for line in response.content:
+                        if not line:
+                            continue
                         try:
+                            line = line.decode('utf-8').strip()
+                            if not line:
+                                continue
                             if line.startswith("data: "):
+                                json_str = line[6:]
+                                if json_str == "[DONE]":
+                                    yield {"data": "[DONE]"}
                                     break
+                                chunk = json.loads(json_str)
+                                chunk_obj = ChatCompletionChunk(
+                                    id=chunk["id"],
+                                    object="chat.completion.chunk",
+                                    created=int(datetime.now().timestamp()),
+                                    model=request.model,
+                                    choices=[
+                                        ChatCompletionChunkChoice(
+                                            index=choice["index"],
+                                            delta=ChatMessage(**choice.get("delta", {})),
+                                            finish_reason=choice.get("finish_reason")
+                                        )
+                                        for choice in chunk["choices"]
+                                    ]
+                                )
+                                yield {"data": json.dumps(chunk_obj.to_dict())}
+                        except json.JSONDecodeError:
+                            continue
+                        except Exception as e:
+                            raise HTTPException(
+                                status_code=500,
+                                detail=f"Stream processing error: {str(e)}"
+                            )
+        except aiohttp.ClientError as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Connection error: {str(e)}"
+            )
         except Exception as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Unexpected error: {str(e)}"
+            )
+    async def generate(self, request: ChatRequest) -> ChatCompletionResponse:
+        payload = {
+            "model": request.model,
+            "messages": self._prepare_messages(request.messages),
+            "temperature": request.temperature,
+            "max_tokens": request.max_tokens,
+            "stream": False
+        }
+        if request.response_format:
+            payload["response_format"] = request.response_format
+        timeout = aiohttp.ClientTimeout(total=300)
         try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.post(
+                    self.url,
+                    headers=self.headers,
+                    json=payload
+                ) as response:
+                    if response.status != 200:
+                        error_msg = await response.text()
+                        raise HTTPException(
+                            status_code=response.status,
+                            detail=f"API request failed: {error_msg}"
                         )
+                    try:
+                        response_data = await response.json()
+                        if not isinstance(response_data, dict):
+                            raise HTTPException(
+                                status_code=500,
+                                detail="Invalid response format from API"
+                            )
+                        # Handle usage data
+                        if 'usage' in response_data:
+                            usage_data = response_data['usage']
+                            for key in ['prompt_tokens', 'completion_tokens', 'total_tokens']:
+                                if key in usage_data and isinstance(usage_data[key], float):
+                                    usage_data[key] = int(usage_data[key])
+                        # Ensure required fields are present
+                        response_data.setdefault('id', str(datetime.now().timestamp()))
+                        response_data.setdefault('object', 'chat.completion')
+                        response_data.setdefault('created', int(datetime.now().timestamp()))
+                        response_data.setdefault('model', request.model)
+                        return ChatCompletionResponse(**response_data)
+                    except json.JSONDecodeError as e:
+                        raise HTTPException(
+                            status_code=500,
+                            detail=f"Failed to parse API response: {str(e)}"
+                        )
+        except aiohttp.ClientError as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Connection error: {str(e)}"
             )
         except Exception as e:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Unexpected error: {str(e)}"
+            )
+app = FastAPI(title="DeepInfra OpenAI Compatible API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@lru_cache()
+def get_client():
+    return DeepInfraClient()
+@app.post("/v1/chat/completions")
+async def create_chat_completion(
+    request: ChatRequest,
+    client: DeepInfraClient = Depends(get_client)
+):
+    try:
+        if request.stream:
+            return EventSourceResponse(client.generate_stream(request))
+        return await client.generate(request)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/v1/models")
+async def list_models():
+    models = os.getenv("MODELS", "").split(",")    models = os.getenv("MODELS", "").split(",")
+    current_timestamp = int(datetime.now().timestamp())
+    return {
+        "data": [
+            {
+                "id": model_id,
+                "object": "model",
+                "created": current_timestamp,
+                "owned_by": "deepinfra"
+            }
+            for model_id in models
+        ]
+    }
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)