Update app.py
Browse files
app.py
CHANGED
@@ -1,294 +1,304 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException,
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from
|
6 |
-
import
|
7 |
-
import
|
8 |
-
import time
|
9 |
import json
|
10 |
-
import
|
11 |
-
import
|
12 |
-
import
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
18 |
-
|
19 |
-
app.add_middleware(
|
20 |
-
CORSMiddleware,
|
21 |
-
allow_origins=["*"],
|
22 |
-
allow_credentials=True,
|
23 |
-
allow_methods=["*"],
|
24 |
-
allow_headers=["*"],
|
25 |
-
)
|
26 |
-
MODEL_MAPPING = {
|
27 |
-
"keyless-gpt-4o-mini": "gpt-4o-mini",
|
28 |
-
"keyless-claude-3-haiku": "claude-3-haiku-20240307",
|
29 |
-
"keyless-mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
30 |
-
"keyless-meta-Llama-3.1-70B-Instruct-Turbo": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
|
31 |
-
}
|
32 |
-
|
33 |
-
class ModelInfo(BaseModel):
|
34 |
-
id: str
|
35 |
-
object: str = "model"
|
36 |
-
created: int = int(time.time())
|
37 |
-
owned_by: str = "custom"
|
38 |
|
39 |
class ChatMessage(BaseModel):
|
40 |
-
role:
|
41 |
-
content: str
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
class
|
58 |
index: int
|
59 |
message: ChatMessage
|
60 |
finish_reason: Optional[str] = None
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
|
67 |
class ChatCompletionResponse(BaseModel):
|
68 |
id: str
|
69 |
-
object: str
|
70 |
created: int
|
71 |
model: str
|
72 |
-
choices: List[
|
73 |
-
usage:
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
index: int
|
81 |
-
delta:
|
82 |
finish_reason: Optional[str] = None
|
83 |
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
id: str
|
86 |
-
object: str
|
87 |
created: int
|
88 |
model: str
|
89 |
-
choices: List[
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
"model": original_model
|
133 |
-
}
|
134 |
-
|
135 |
-
headers = {
|
136 |
-
"x-vqd-4": vqd_token,
|
137 |
-
"Content-Type": "application/json",
|
138 |
-
"User-Agent": user_agent
|
139 |
-
}
|
140 |
-
|
141 |
-
logging.info(f"Sending payload to DuckDuckGo with User-Agent: {user_agent}")
|
142 |
|
143 |
-
|
|
|
144 |
try:
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
try:
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
except json.JSONDecodeError:
|
159 |
-
logging.warning(f"Failed to parse JSON: {data}")
|
160 |
-
elif response.status_code == 429:
|
161 |
-
logging.warning("Rate limit exceeded. Changing User-Agent and retrying.")
|
162 |
-
for attempt in range(5): # Try up to 5 times
|
163 |
-
user_agent = get_next_user_agent()
|
164 |
-
vqd_token = await update_vqd_token(user_agent)
|
165 |
-
headers["User-Agent"] = user_agent
|
166 |
-
headers["x-vqd-4"] = vqd_token
|
167 |
-
logging.info(f"Retrying with new User-Agent: {user_agent}")
|
168 |
-
response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
|
169 |
-
if response.status_code == 200:
|
170 |
-
async for line in response.aiter_lines():
|
171 |
if line.startswith("data: "):
|
172 |
-
|
173 |
-
if
|
|
|
174 |
break
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
except Exception as e:
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
async def list_models():
|
199 |
-
logging.info("Listing available models")
|
200 |
-
models = [ModelInfo(id=model_id) for model_id in MODEL_MAPPING.keys()]
|
201 |
-
return {"data": models, "object": "list"}
|
202 |
-
|
203 |
-
@app.post("/v1/chat/completions")
|
204 |
-
async def chat_completion(request: ChatCompletionRequest):
|
205 |
-
conversation_id = str(uuid.uuid4())
|
206 |
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
-
|
211 |
-
conversation_history.extend(request.messages)
|
212 |
|
213 |
-
async def generate():
|
214 |
try:
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
delta=DeltaMessage(content=chunk),
|
227 |
-
finish_reason=None
|
228 |
)
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
)
|
246 |
-
yield f"data: {final_response.model_dump_json()}\n\n"
|
247 |
-
yield "data: [DONE]\n\n"
|
248 |
except Exception as e:
|
249 |
-
|
250 |
-
|
|
|
|
|
251 |
|
252 |
-
|
253 |
-
return StreamingResponse(generate(), media_type="text/event-stream")
|
254 |
-
else:
|
255 |
-
full_response = ""
|
256 |
-
async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
|
257 |
-
full_response += chunk
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
message=ChatMessage(role="assistant", content=full_response),
|
267 |
-
finish_reason="stop"
|
268 |
-
)
|
269 |
-
],
|
270 |
-
usage=ChatCompletionResponseUsage(
|
271 |
-
prompt_tokens=sum(len(msg.content.split()) for msg in conversation_history),
|
272 |
-
completion_tokens=len(full_response.split()),
|
273 |
-
total_tokens=sum(len(msg.content.split()) for msg in conversation_history) + len(full_response.split())
|
274 |
-
)
|
275 |
-
)
|
276 |
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
@app.
|
283 |
-
async def
|
284 |
-
|
285 |
-
del conversations[conversation_id]
|
286 |
-
logging.info(f"Conversation {conversation_id} ended and context cleared")
|
287 |
-
return {"message": f"Conversation {conversation_id} ended and context cleared."}
|
288 |
-
else:
|
289 |
-
logging.warning(f"Attempt to end non-existent conversation {conversation_id}")
|
290 |
-
raise HTTPException(status_code=404, detail="Conversation not found")
|
291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
if __name__ == "__main__":
|
293 |
import uvicorn
|
294 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Depends
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from sse_starlette.sse import EventSourceResponse
|
4 |
+
from pydantic import BaseModel, Field
|
5 |
+
from typing import AsyncGenerator, Optional, List, Dict, Any
|
6 |
+
from enum import Enum
|
7 |
+
from datetime import datetime
|
|
|
8 |
import json
|
9 |
+
import aiohttp
|
10 |
+
from functools import lru_cache
|
11 |
+
import os
|
12 |
+
class Role(str, Enum):
|
13 |
+
SYSTEM = "system"
|
14 |
+
USER = "user"
|
15 |
+
ASSISTANT = "assistant"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
class ChatMessage(BaseModel):
|
18 |
+
role: Optional[Role] = None
|
19 |
+
content: Optional[str] = None
|
20 |
+
|
21 |
+
def to_dict(self) -> Dict[str, Any]:
|
22 |
+
message_dict = {}
|
23 |
+
if self.role is not None:
|
24 |
+
message_dict['role'] = self.role
|
25 |
+
if self.content is not None:
|
26 |
+
message_dict['content'] = self.content
|
27 |
+
return message_dict
|
28 |
+
|
29 |
+
class UsageInfo(BaseModel):
|
30 |
+
prompt_tokens: Optional[int] = None
|
31 |
+
completion_tokens: Optional[int] = None
|
32 |
+
total_tokens: Optional[int] = None
|
33 |
+
estimated_cost: Optional[float] = None
|
34 |
+
|
35 |
+
class ChatCompletionChoice(BaseModel):
|
36 |
index: int
|
37 |
message: ChatMessage
|
38 |
finish_reason: Optional[str] = None
|
39 |
|
40 |
+
def to_dict(self) -> Dict[str, Any]:
|
41 |
+
return {
|
42 |
+
'index': self.index,
|
43 |
+
'message': self.message.to_dict(),
|
44 |
+
'finish_reason': self.finish_reason
|
45 |
+
}
|
46 |
|
47 |
class ChatCompletionResponse(BaseModel):
|
48 |
id: str
|
49 |
+
object: str
|
50 |
created: int
|
51 |
model: str
|
52 |
+
choices: List[ChatCompletionChoice]
|
53 |
+
usage: Optional[Dict[str, Any]] = None
|
54 |
+
|
55 |
+
def to_dict(self) -> Dict[str, Any]:
|
56 |
+
return {
|
57 |
+
'id': self.id,
|
58 |
+
'object': self.object,
|
59 |
+
'created': self.created,
|
60 |
+
'model': self.model,
|
61 |
+
'choices': [choice.to_dict() for choice in self.choices],
|
62 |
+
'usage': self.usage
|
63 |
+
}
|
64 |
+
|
65 |
+
class ChatCompletionChunkChoice(BaseModel):
|
66 |
index: int
|
67 |
+
delta: ChatMessage
|
68 |
finish_reason: Optional[str] = None
|
69 |
|
70 |
+
def to_dict(self) -> Dict[str, Any]:
|
71 |
+
return {
|
72 |
+
'index': self.index,
|
73 |
+
'delta': self.delta.to_dict(),
|
74 |
+
'finish_reason': self.finish_reason
|
75 |
+
}
|
76 |
+
|
77 |
+
class ChatCompletionChunk(BaseModel):
|
78 |
id: str
|
79 |
+
object: str
|
80 |
created: int
|
81 |
model: str
|
82 |
+
choices: List[ChatCompletionChunkChoice]
|
83 |
+
|
84 |
+
def to_dict(self) -> Dict[str, Any]:
|
85 |
+
return {
|
86 |
+
'id': self.id,
|
87 |
+
'object': self.object,
|
88 |
+
'created': self.created,
|
89 |
+
'model': self.model,
|
90 |
+
'choices': [choice.to_dict() for choice in self.choices]
|
91 |
+
}
|
92 |
+
|
93 |
+
class ChatRequest(BaseModel):
|
94 |
+
messages: List[ChatMessage]
|
95 |
+
model: str = Field(default="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")
|
96 |
+
temperature: float = Field(default=0.7, ge=0.0, le=2.0)
|
97 |
+
max_tokens: Optional[int] = Field(default=2048)
|
98 |
+
stream: bool = Field(default=False)
|
99 |
+
response_format: Optional[Dict[str, str]] = None
|
100 |
+
|
101 |
+
class DeepInfraClient:
|
102 |
+
def __init__(self, api_key: Optional[str] = None):
|
103 |
+
self.url = "https://api.deepinfra.com/v1/openai/chat/completions"
|
104 |
+
self.headers = {
|
105 |
+
"Accept": "text/event-stream, application/json",
|
106 |
+
"Content-Type": "application/json"
|
107 |
+
}
|
108 |
+
if api_key:
|
109 |
+
self.headers["Authorization"] = f"Bearer {api_key}"
|
110 |
+
|
111 |
+
def _prepare_messages(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
|
112 |
+
return [message.to_dict() for message in messages]
|
113 |
+
|
114 |
+
async def generate_stream(self, request: ChatRequest) -> AsyncGenerator:
|
115 |
+
payload = {
|
116 |
+
"model": request.model,
|
117 |
+
"messages": self._prepare_messages(request.messages),
|
118 |
+
"temperature": request.temperature,
|
119 |
+
"max_tokens": request.max_tokens,
|
120 |
+
"stream": True
|
121 |
+
}
|
122 |
+
|
123 |
+
if request.response_format:
|
124 |
+
payload["response_format"] = request.response_format
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
timeout = aiohttp.ClientTimeout(total=300)
|
127 |
+
|
128 |
try:
|
129 |
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
130 |
+
async with session.post(
|
131 |
+
self.url,
|
132 |
+
headers=self.headers,
|
133 |
+
json=payload,
|
134 |
+
chunked=True
|
135 |
+
) as response:
|
136 |
+
if response.status != 200:
|
137 |
+
error_msg = await response.text()
|
138 |
+
raise HTTPException(
|
139 |
+
status_code=response.status,
|
140 |
+
detail=f"API request failed: {error_msg}"
|
141 |
+
)
|
142 |
+
|
143 |
+
async for line in response.content:
|
144 |
+
if not line:
|
145 |
+
continue
|
146 |
+
|
147 |
try:
|
148 |
+
line = line.decode('utf-8').strip()
|
149 |
+
if not line:
|
150 |
+
continue
|
151 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
if line.startswith("data: "):
|
153 |
+
json_str = line[6:]
|
154 |
+
if json_str == "[DONE]":
|
155 |
+
yield {"data": "[DONE]"}
|
156 |
break
|
157 |
+
|
158 |
+
chunk = json.loads(json_str)
|
159 |
+
chunk_obj = ChatCompletionChunk(
|
160 |
+
id=chunk["id"],
|
161 |
+
object="chat.completion.chunk",
|
162 |
+
created=int(datetime.now().timestamp()),
|
163 |
+
model=request.model,
|
164 |
+
choices=[
|
165 |
+
ChatCompletionChunkChoice(
|
166 |
+
index=choice["index"],
|
167 |
+
delta=ChatMessage(**choice.get("delta", {})),
|
168 |
+
finish_reason=choice.get("finish_reason")
|
169 |
+
)
|
170 |
+
for choice in chunk["choices"]
|
171 |
+
]
|
172 |
+
)
|
173 |
+
yield {"data": json.dumps(chunk_obj.to_dict())}
|
174 |
+
except json.JSONDecodeError:
|
175 |
+
continue
|
176 |
+
except Exception as e:
|
177 |
+
raise HTTPException(
|
178 |
+
status_code=500,
|
179 |
+
detail=f"Stream processing error: {str(e)}"
|
180 |
+
)
|
181 |
+
|
182 |
+
except aiohttp.ClientError as e:
|
183 |
+
raise HTTPException(
|
184 |
+
status_code=500,
|
185 |
+
detail=f"Connection error: {str(e)}"
|
186 |
+
)
|
187 |
except Exception as e:
|
188 |
+
raise HTTPException(
|
189 |
+
status_code=500,
|
190 |
+
detail=f"Unexpected error: {str(e)}"
|
191 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
+
async def generate(self, request: ChatRequest) -> ChatCompletionResponse:
|
194 |
+
payload = {
|
195 |
+
"model": request.model,
|
196 |
+
"messages": self._prepare_messages(request.messages),
|
197 |
+
"temperature": request.temperature,
|
198 |
+
"max_tokens": request.max_tokens,
|
199 |
+
"stream": False
|
200 |
+
}
|
201 |
+
|
202 |
+
if request.response_format:
|
203 |
+
payload["response_format"] = request.response_format
|
204 |
|
205 |
+
timeout = aiohttp.ClientTimeout(total=300)
|
|
|
206 |
|
|
|
207 |
try:
|
208 |
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
209 |
+
async with session.post(
|
210 |
+
self.url,
|
211 |
+
headers=self.headers,
|
212 |
+
json=payload
|
213 |
+
) as response:
|
214 |
+
if response.status != 200:
|
215 |
+
error_msg = await response.text()
|
216 |
+
raise HTTPException(
|
217 |
+
status_code=response.status,
|
218 |
+
detail=f"API request failed: {error_msg}"
|
|
|
|
|
219 |
)
|
220 |
+
|
221 |
+
try:
|
222 |
+
response_data = await response.json()
|
223 |
+
if not isinstance(response_data, dict):
|
224 |
+
raise HTTPException(
|
225 |
+
status_code=500,
|
226 |
+
detail="Invalid response format from API"
|
227 |
+
)
|
228 |
+
|
229 |
+
# Handle usage data
|
230 |
+
if 'usage' in response_data:
|
231 |
+
usage_data = response_data['usage']
|
232 |
+
for key in ['prompt_tokens', 'completion_tokens', 'total_tokens']:
|
233 |
+
if key in usage_data and isinstance(usage_data[key], float):
|
234 |
+
usage_data[key] = int(usage_data[key])
|
235 |
+
|
236 |
+
# Ensure required fields are present
|
237 |
+
response_data.setdefault('id', str(datetime.now().timestamp()))
|
238 |
+
response_data.setdefault('object', 'chat.completion')
|
239 |
+
response_data.setdefault('created', int(datetime.now().timestamp()))
|
240 |
+
response_data.setdefault('model', request.model)
|
241 |
+
|
242 |
+
return ChatCompletionResponse(**response_data)
|
243 |
+
except json.JSONDecodeError as e:
|
244 |
+
raise HTTPException(
|
245 |
+
status_code=500,
|
246 |
+
detail=f"Failed to parse API response: {str(e)}"
|
247 |
+
)
|
248 |
+
except aiohttp.ClientError as e:
|
249 |
+
raise HTTPException(
|
250 |
+
status_code=500,
|
251 |
+
detail=f"Connection error: {str(e)}"
|
252 |
)
|
|
|
|
|
253 |
except Exception as e:
|
254 |
+
raise HTTPException(
|
255 |
+
status_code=500,
|
256 |
+
detail=f"Unexpected error: {str(e)}"
|
257 |
+
)
|
258 |
|
259 |
+
app = FastAPI(title="DeepInfra OpenAI Compatible API")
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
+
app.add_middleware(
|
262 |
+
CORSMiddleware,
|
263 |
+
allow_origins=["*"],
|
264 |
+
allow_credentials=True,
|
265 |
+
allow_methods=["*"],
|
266 |
+
allow_headers=["*"],
|
267 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
+
@lru_cache()
|
270 |
+
def get_client():
|
271 |
+
return DeepInfraClient()
|
272 |
+
|
273 |
+
@app.post("/v1/chat/completions")
|
274 |
+
async def create_chat_completion(
|
275 |
+
request: ChatRequest,
|
276 |
+
client: DeepInfraClient = Depends(get_client)
|
277 |
+
):
|
278 |
+
try:
|
279 |
+
if request.stream:
|
280 |
+
return EventSourceResponse(client.generate_stream(request))
|
281 |
+
return await client.generate(request)
|
282 |
+
except Exception as e:
|
283 |
+
raise HTTPException(status_code=500, detail=str(e))
|
284 |
|
285 |
+
@app.get("/v1/models")
|
286 |
+
async def list_models():
|
287 |
+
models = os.getenv("MODELS", "").split(",") models = os.getenv("MODELS", "").split(",")
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
+
current_timestamp = int(datetime.now().timestamp())
|
290 |
+
|
291 |
+
return {
|
292 |
+
"data": [
|
293 |
+
{
|
294 |
+
"id": model_id,
|
295 |
+
"object": "model",
|
296 |
+
"created": current_timestamp,
|
297 |
+
"owned_by": "deepinfra"
|
298 |
+
}
|
299 |
+
for model_id in models
|
300 |
+
]
|
301 |
+
}
|
302 |
if __name__ == "__main__":
|
303 |
import uvicorn
|
304 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|