Abhaykoul commited on
Commit
c29cacb
1 Parent(s): ee5c64f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -255
app.py CHANGED
@@ -1,294 +1,304 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
- from typing import List, Dict, Optional, Union
5
- from fastapi.responses import StreamingResponse
6
- import logging
7
- import uuid
8
- import time
9
  import json
10
- import asyncio
11
- import random
12
- import httpx
13
- from fake_useragent import UserAgent
14
-
15
- app = FastAPI()
16
-
17
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
-
19
- app.add_middleware(
20
- CORSMiddleware,
21
- allow_origins=["*"],
22
- allow_credentials=True,
23
- allow_methods=["*"],
24
- allow_headers=["*"],
25
- )
26
- MODEL_MAPPING = {
27
- "keyless-gpt-4o-mini": "gpt-4o-mini",
28
- "keyless-claude-3-haiku": "claude-3-haiku-20240307",
29
- "keyless-mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
- "keyless-meta-Llama-3.1-70B-Instruct-Turbo": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
31
- }
32
-
33
- class ModelInfo(BaseModel):
34
- id: str
35
- object: str = "model"
36
- created: int = int(time.time())
37
- owned_by: str = "custom"
38
 
39
  class ChatMessage(BaseModel):
40
- role: str
41
- content: str
42
-
43
- class ChatCompletionRequest(BaseModel):
44
- model: str
45
- messages: List[ChatMessage]
46
- temperature: Optional[float] = 1.0
47
- top_p: Optional[float] = 1.0
48
- n: Optional[int] = 1
49
- stream: Optional[bool] = False
50
- stop: Optional[Union[str, List[str]]] = None
51
- max_tokens: Optional[int] = None
52
- presence_penalty: Optional[float] = 0.0
53
- frequency_penalty: Optional[float] = 0.0
54
- logit_bias: Optional[Dict[str, float]] = None
55
- user: Optional[str] = None
56
-
57
- class ChatCompletionResponseChoice(BaseModel):
58
  index: int
59
  message: ChatMessage
60
  finish_reason: Optional[str] = None
61
 
62
- class ChatCompletionResponseUsage(BaseModel):
63
- prompt_tokens: int
64
- completion_tokens: int
65
- total_tokens: int
 
 
66
 
67
  class ChatCompletionResponse(BaseModel):
68
  id: str
69
- object: str = "chat.completion"
70
  created: int
71
  model: str
72
- choices: List[ChatCompletionResponseChoice]
73
- usage: ChatCompletionResponseUsage
74
-
75
- class DeltaMessage(BaseModel):
76
- role: Optional[str] = None
77
- content: Optional[str] = None
78
-
79
- class ChatCompletionStreamResponseChoice(BaseModel):
 
 
 
 
 
 
80
  index: int
81
- delta: DeltaMessage
82
  finish_reason: Optional[str] = None
83
 
84
- class ChatCompletionStreamResponse(BaseModel):
 
 
 
 
 
 
 
85
  id: str
86
- object: str = "chat.completion.chunk"
87
  created: int
88
  model: str
89
- choices: List[ChatCompletionStreamResponseChoice]
90
-
91
- # Store active conversations
92
- conversations: Dict[str, List[ChatMessage]] = {}
93
-
94
- ua = UserAgent()
95
-
96
- def get_next_user_agent():
97
- return ua.random
98
-
99
- async def update_vqd_token(user_agent):
100
- async with httpx.AsyncClient() as client:
101
- try:
102
- await client.get("https://duckduckgo.com/country.json", headers={"User-Agent": user_agent})
103
- headers = {"x-vqd-accept": "1", "User-Agent": user_agent}
104
- response = await client.get("https://duckduckgo.com/duckchat/v1/status", headers=headers)
105
- if response.status_code == 200:
106
- vqd_token = response.headers.get("x-vqd-4", "")
107
- logging.info(f"Fetched new x-vqd-4 token: {vqd_token}")
108
- return vqd_token
109
- else:
110
- logging.warning(f"Failed to fetch x-vqd-4 token. Status code: {response.status_code}")
111
- return ""
112
- except Exception as e:
113
- logging.error(f"Error fetching x-vqd-4 token: {str(e)}")
114
- return ""
115
-
116
- async def chat_with_duckduckgo(query: str, model: str, conversation_history: List[ChatMessage]):
117
- original_model = MODEL_MAPPING.get(model, model)
118
- user_agent = get_next_user_agent()
119
- vqd_token = await update_vqd_token(user_agent)
120
- if not vqd_token:
121
- raise HTTPException(status_code=500, detail="Failed to obtain VQD token")
122
-
123
- # If there is a system message, add it before the first user message (DDG AI doesnt let us send system messages, so this is a workaround -- fundamentally, it works the same way when setting a system prompt)
124
- system_message = next((msg for msg in conversation_history if msg.role == "system"), None)
125
- user_messages = [{"role": msg.role, "content": msg.content} for msg in conversation_history if msg.role == "user"]
126
-
127
- if system_message and user_messages:
128
- user_messages[0]["content"] = f"{system_message.content}\n\n{user_messages[0]['content']}"
129
-
130
- payload = {
131
- "messages": user_messages,
132
- "model": original_model
133
- }
134
-
135
- headers = {
136
- "x-vqd-4": vqd_token,
137
- "Content-Type": "application/json",
138
- "User-Agent": user_agent
139
- }
140
-
141
- logging.info(f"Sending payload to DuckDuckGo with User-Agent: {user_agent}")
142
 
143
- async with httpx.AsyncClient() as client:
 
144
  try:
145
- response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
146
- if response.status_code == 200:
147
- full_response = ""
148
- async for line in response.aiter_lines():
149
- if line.startswith("data: "):
150
- data = line[6:].strip()
151
- if data == "[DONE]":
152
- break
 
 
 
 
 
 
 
 
 
 
153
  try:
154
- json_data = json.loads(data)
155
- message = json_data.get("message", "")
156
- full_response += message
157
- yield message
158
- except json.JSONDecodeError:
159
- logging.warning(f"Failed to parse JSON: {data}")
160
- elif response.status_code == 429:
161
- logging.warning("Rate limit exceeded. Changing User-Agent and retrying.")
162
- for attempt in range(5): # Try up to 5 times
163
- user_agent = get_next_user_agent()
164
- vqd_token = await update_vqd_token(user_agent)
165
- headers["User-Agent"] = user_agent
166
- headers["x-vqd-4"] = vqd_token
167
- logging.info(f"Retrying with new User-Agent: {user_agent}")
168
- response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
169
- if response.status_code == 200:
170
- async for line in response.aiter_lines():
171
  if line.startswith("data: "):
172
- data = line[6:].strip()
173
- if data == "[DONE]":
 
174
  break
175
- try:
176
- json_data = json.loads(data)
177
- message = json_data.get("message", "")
178
- yield message
179
- except json.JSONDecodeError:
180
- logging.warning(f"Failed to parse JSON: {data}")
181
- break
182
- else:
183
- raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
184
- else:
185
- logging.error(f"Error response from DuckDuckGo. Status code: {response.status_code}")
186
- raise HTTPException(status_code=response.status_code, detail=f"Error communicating with DuckDuckGo: {response.text}")
187
- except httpx.HTTPStatusError as e:
188
- logging.error(f"HTTP error occurred: {str(e)}")
189
- raise HTTPException(status_code=e.response.status_code, detail=str(e))
190
- except httpx.RequestError as e:
191
- logging.error(f"Request error occurred: {str(e)}")
192
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
193
  except Exception as e:
194
- logging.error(f"Unexpected error in chat_with_duckduckgo: {str(e)}")
195
- raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
196
-
197
- @app.get("/v1/models")
198
- async def list_models():
199
- logging.info("Listing available models")
200
- models = [ModelInfo(id=model_id) for model_id in MODEL_MAPPING.keys()]
201
- return {"data": models, "object": "list"}
202
-
203
- @app.post("/v1/chat/completions")
204
- async def chat_completion(request: ChatCompletionRequest):
205
- conversation_id = str(uuid.uuid4())
206
 
207
- logging.info(f"Received chat completion request for conversation {conversation_id}")
208
- logging.info(f"Request: {request.model_dump()}")
 
 
 
 
 
 
 
 
 
209
 
210
- conversation_history = conversations.get(conversation_id, [])
211
- conversation_history.extend(request.messages)
212
 
213
- async def generate():
214
  try:
215
- full_response = ""
216
- async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
217
- full_response += chunk
218
-
219
- response = ChatCompletionStreamResponse(
220
- id=conversation_id,
221
- created=int(time.time()),
222
- model=request.model,
223
- choices=[
224
- ChatCompletionStreamResponseChoice(
225
- index=0,
226
- delta=DeltaMessage(content=chunk),
227
- finish_reason=None
228
  )
229
- ]
230
- )
231
- yield f"data: {response.model_dump_json()}\n\n"
232
- await asyncio.sleep(random.uniform(0.05, 0.1))
233
-
234
- final_response = ChatCompletionStreamResponse(
235
- id=conversation_id,
236
- created=int(time.time()),
237
- model=request.model,
238
- choices=[
239
- ChatCompletionStreamResponseChoice(
240
- index=0,
241
- delta=DeltaMessage(),
242
- finish_reason="stop"
243
- )
244
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  )
246
- yield f"data: {final_response.model_dump_json()}\n\n"
247
- yield "data: [DONE]\n\n"
248
  except Exception as e:
249
- logging.error(f"Error during streaming: {str(e)}")
250
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
 
 
251
 
252
- if request.stream:
253
- return StreamingResponse(generate(), media_type="text/event-stream")
254
- else:
255
- full_response = ""
256
- async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
257
- full_response += chunk
258
 
259
- response = ChatCompletionResponse(
260
- id=conversation_id,
261
- created=int(time.time()),
262
- model=request.model,
263
- choices=[
264
- ChatCompletionResponseChoice(
265
- index=0,
266
- message=ChatMessage(role="assistant", content=full_response),
267
- finish_reason="stop"
268
- )
269
- ],
270
- usage=ChatCompletionResponseUsage(
271
- prompt_tokens=sum(len(msg.content.split()) for msg in conversation_history),
272
- completion_tokens=len(full_response.split()),
273
- total_tokens=sum(len(msg.content.split()) for msg in conversation_history) + len(full_response.split())
274
- )
275
- )
276
 
277
- conversation_history.append(ChatMessage(role="assistant", content=full_response))
278
- conversations[conversation_id] = conversation_history
279
-
280
- return response
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- @app.delete("/v1/conversations/{conversation_id}")
283
- async def end_conversation(conversation_id: str):
284
- if conversation_id in conversations:
285
- del conversations[conversation_id]
286
- logging.info(f"Conversation {conversation_id} ended and context cleared")
287
- return {"message": f"Conversation {conversation_id} ended and context cleared."}
288
- else:
289
- logging.warning(f"Attempt to end non-existent conversation {conversation_id}")
290
- raise HTTPException(status_code=404, detail="Conversation not found")
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  if __name__ == "__main__":
293
  import uvicorn
294
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ from fastapi import FastAPI, HTTPException, Depends
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from sse_starlette.sse import EventSourceResponse
4
+ from pydantic import BaseModel, Field
5
+ from typing import AsyncGenerator, Optional, List, Dict, Any
6
+ from enum import Enum
7
+ from datetime import datetime
 
8
  import json
9
+ import aiohttp
10
+ from functools import lru_cache
11
+ import os
12
+ class Role(str, Enum):
13
+ SYSTEM = "system"
14
+ USER = "user"
15
+ ASSISTANT = "assistant"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  class ChatMessage(BaseModel):
18
+ role: Optional[Role] = None
19
+ content: Optional[str] = None
20
+
21
+ def to_dict(self) -> Dict[str, Any]:
22
+ message_dict = {}
23
+ if self.role is not None:
24
+ message_dict['role'] = self.role
25
+ if self.content is not None:
26
+ message_dict['content'] = self.content
27
+ return message_dict
28
+
29
+ class UsageInfo(BaseModel):
30
+ prompt_tokens: Optional[int] = None
31
+ completion_tokens: Optional[int] = None
32
+ total_tokens: Optional[int] = None
33
+ estimated_cost: Optional[float] = None
34
+
35
+ class ChatCompletionChoice(BaseModel):
36
  index: int
37
  message: ChatMessage
38
  finish_reason: Optional[str] = None
39
 
40
+ def to_dict(self) -> Dict[str, Any]:
41
+ return {
42
+ 'index': self.index,
43
+ 'message': self.message.to_dict(),
44
+ 'finish_reason': self.finish_reason
45
+ }
46
 
47
  class ChatCompletionResponse(BaseModel):
48
  id: str
49
+ object: str
50
  created: int
51
  model: str
52
+ choices: List[ChatCompletionChoice]
53
+ usage: Optional[Dict[str, Any]] = None
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ return {
57
+ 'id': self.id,
58
+ 'object': self.object,
59
+ 'created': self.created,
60
+ 'model': self.model,
61
+ 'choices': [choice.to_dict() for choice in self.choices],
62
+ 'usage': self.usage
63
+ }
64
+
65
+ class ChatCompletionChunkChoice(BaseModel):
66
  index: int
67
+ delta: ChatMessage
68
  finish_reason: Optional[str] = None
69
 
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ return {
72
+ 'index': self.index,
73
+ 'delta': self.delta.to_dict(),
74
+ 'finish_reason': self.finish_reason
75
+ }
76
+
77
+ class ChatCompletionChunk(BaseModel):
78
  id: str
79
+ object: str
80
  created: int
81
  model: str
82
+ choices: List[ChatCompletionChunkChoice]
83
+
84
+ def to_dict(self) -> Dict[str, Any]:
85
+ return {
86
+ 'id': self.id,
87
+ 'object': self.object,
88
+ 'created': self.created,
89
+ 'model': self.model,
90
+ 'choices': [choice.to_dict() for choice in self.choices]
91
+ }
92
+
93
+ class ChatRequest(BaseModel):
94
+ messages: List[ChatMessage]
95
+ model: str = Field(default="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")
96
+ temperature: float = Field(default=0.7, ge=0.0, le=2.0)
97
+ max_tokens: Optional[int] = Field(default=2048)
98
+ stream: bool = Field(default=False)
99
+ response_format: Optional[Dict[str, str]] = None
100
+
101
+ class DeepInfraClient:
102
+ def __init__(self, api_key: Optional[str] = None):
103
+ self.url = "https://api.deepinfra.com/v1/openai/chat/completions"
104
+ self.headers = {
105
+ "Accept": "text/event-stream, application/json",
106
+ "Content-Type": "application/json"
107
+ }
108
+ if api_key:
109
+ self.headers["Authorization"] = f"Bearer {api_key}"
110
+
111
+ def _prepare_messages(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
112
+ return [message.to_dict() for message in messages]
113
+
114
+ async def generate_stream(self, request: ChatRequest) -> AsyncGenerator:
115
+ payload = {
116
+ "model": request.model,
117
+ "messages": self._prepare_messages(request.messages),
118
+ "temperature": request.temperature,
119
+ "max_tokens": request.max_tokens,
120
+ "stream": True
121
+ }
122
+
123
+ if request.response_format:
124
+ payload["response_format"] = request.response_format
 
 
 
 
 
 
 
 
 
 
125
 
126
+ timeout = aiohttp.ClientTimeout(total=300)
127
+
128
  try:
129
+ async with aiohttp.ClientSession(timeout=timeout) as session:
130
+ async with session.post(
131
+ self.url,
132
+ headers=self.headers,
133
+ json=payload,
134
+ chunked=True
135
+ ) as response:
136
+ if response.status != 200:
137
+ error_msg = await response.text()
138
+ raise HTTPException(
139
+ status_code=response.status,
140
+ detail=f"API request failed: {error_msg}"
141
+ )
142
+
143
+ async for line in response.content:
144
+ if not line:
145
+ continue
146
+
147
  try:
148
+ line = line.decode('utf-8').strip()
149
+ if not line:
150
+ continue
151
+
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  if line.startswith("data: "):
153
+ json_str = line[6:]
154
+ if json_str == "[DONE]":
155
+ yield {"data": "[DONE]"}
156
  break
157
+
158
+ chunk = json.loads(json_str)
159
+ chunk_obj = ChatCompletionChunk(
160
+ id=chunk["id"],
161
+ object="chat.completion.chunk",
162
+ created=int(datetime.now().timestamp()),
163
+ model=request.model,
164
+ choices=[
165
+ ChatCompletionChunkChoice(
166
+ index=choice["index"],
167
+ delta=ChatMessage(**choice.get("delta", {})),
168
+ finish_reason=choice.get("finish_reason")
169
+ )
170
+ for choice in chunk["choices"]
171
+ ]
172
+ )
173
+ yield {"data": json.dumps(chunk_obj.to_dict())}
174
+ except json.JSONDecodeError:
175
+ continue
176
+ except Exception as e:
177
+ raise HTTPException(
178
+ status_code=500,
179
+ detail=f"Stream processing error: {str(e)}"
180
+ )
181
+
182
+ except aiohttp.ClientError as e:
183
+ raise HTTPException(
184
+ status_code=500,
185
+ detail=f"Connection error: {str(e)}"
186
+ )
187
  except Exception as e:
188
+ raise HTTPException(
189
+ status_code=500,
190
+ detail=f"Unexpected error: {str(e)}"
191
+ )
 
 
 
 
 
 
 
 
192
 
193
+ async def generate(self, request: ChatRequest) -> ChatCompletionResponse:
194
+ payload = {
195
+ "model": request.model,
196
+ "messages": self._prepare_messages(request.messages),
197
+ "temperature": request.temperature,
198
+ "max_tokens": request.max_tokens,
199
+ "stream": False
200
+ }
201
+
202
+ if request.response_format:
203
+ payload["response_format"] = request.response_format
204
 
205
+ timeout = aiohttp.ClientTimeout(total=300)
 
206
 
 
207
  try:
208
+ async with aiohttp.ClientSession(timeout=timeout) as session:
209
+ async with session.post(
210
+ self.url,
211
+ headers=self.headers,
212
+ json=payload
213
+ ) as response:
214
+ if response.status != 200:
215
+ error_msg = await response.text()
216
+ raise HTTPException(
217
+ status_code=response.status,
218
+ detail=f"API request failed: {error_msg}"
 
 
219
  )
220
+
221
+ try:
222
+ response_data = await response.json()
223
+ if not isinstance(response_data, dict):
224
+ raise HTTPException(
225
+ status_code=500,
226
+ detail="Invalid response format from API"
227
+ )
228
+
229
+ # Handle usage data
230
+ if 'usage' in response_data:
231
+ usage_data = response_data['usage']
232
+ for key in ['prompt_tokens', 'completion_tokens', 'total_tokens']:
233
+ if key in usage_data and isinstance(usage_data[key], float):
234
+ usage_data[key] = int(usage_data[key])
235
+
236
+ # Ensure required fields are present
237
+ response_data.setdefault('id', str(datetime.now().timestamp()))
238
+ response_data.setdefault('object', 'chat.completion')
239
+ response_data.setdefault('created', int(datetime.now().timestamp()))
240
+ response_data.setdefault('model', request.model)
241
+
242
+ return ChatCompletionResponse(**response_data)
243
+ except json.JSONDecodeError as e:
244
+ raise HTTPException(
245
+ status_code=500,
246
+ detail=f"Failed to parse API response: {str(e)}"
247
+ )
248
+ except aiohttp.ClientError as e:
249
+ raise HTTPException(
250
+ status_code=500,
251
+ detail=f"Connection error: {str(e)}"
252
  )
 
 
253
  except Exception as e:
254
+ raise HTTPException(
255
+ status_code=500,
256
+ detail=f"Unexpected error: {str(e)}"
257
+ )
258
 
259
+ app = FastAPI(title="DeepInfra OpenAI Compatible API")
 
 
 
 
 
260
 
261
+ app.add_middleware(
262
+ CORSMiddleware,
263
+ allow_origins=["*"],
264
+ allow_credentials=True,
265
+ allow_methods=["*"],
266
+ allow_headers=["*"],
267
+ )
 
 
 
 
 
 
 
 
 
 
268
 
269
+ @lru_cache()
270
+ def get_client():
271
+ return DeepInfraClient()
272
+
273
+ @app.post("/v1/chat/completions")
274
+ async def create_chat_completion(
275
+ request: ChatRequest,
276
+ client: DeepInfraClient = Depends(get_client)
277
+ ):
278
+ try:
279
+ if request.stream:
280
+ return EventSourceResponse(client.generate_stream(request))
281
+ return await client.generate(request)
282
+ except Exception as e:
283
+ raise HTTPException(status_code=500, detail=str(e))
284
 
285
+ @app.get("/v1/models")
286
+ async def list_models():
287
+ models = os.getenv("MODELS", "").split(",") models = os.getenv("MODELS", "").split(",")
 
 
 
 
 
 
288
 
289
+ current_timestamp = int(datetime.now().timestamp())
290
+
291
+ return {
292
+ "data": [
293
+ {
294
+ "id": model_id,
295
+ "object": "model",
296
+ "created": current_timestamp,
297
+ "owned_by": "deepinfra"
298
+ }
299
+ for model_id in models
300
+ ]
301
+ }
302
  if __name__ == "__main__":
303
  import uvicorn
304
+ uvicorn.run(app, host="0.0.0.0", port=8000)