Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Request
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from typing import List, Dict, Optional, Union
|
5 |
+
from fastapi.responses import StreamingResponse
|
6 |
+
import logging
|
7 |
+
import uuid
|
8 |
+
import time
|
9 |
+
import json
|
10 |
+
import asyncio
|
11 |
+
import random
|
12 |
+
import httpx
|
13 |
+
from fake_useragent import UserAgent
|
14 |
+
|
15 |
+
app = FastAPI()
|
16 |
+
|
17 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
18 |
+
|
19 |
+
app.add_middleware(
|
20 |
+
CORSMiddleware,
|
21 |
+
allow_origins=["*"],
|
22 |
+
allow_credentials=True,
|
23 |
+
allow_methods=["*"],
|
24 |
+
allow_headers=["*"],
|
25 |
+
)
|
26 |
+
MODEL_MAPPING = {
|
27 |
+
"keyless-gpt-4o-mini": "gpt-4o-mini",
|
28 |
+
"keyless-claude-3-haiku": "claude-3-haiku-20240307",
|
29 |
+
"keyless-mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
30 |
+
"keyless-meta-Llama-3.1-70B-Instruct-Turbo": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
|
31 |
+
}
|
32 |
+
|
33 |
+
class ModelInfo(BaseModel):
|
34 |
+
id: str
|
35 |
+
object: str = "model"
|
36 |
+
created: int = int(time.time())
|
37 |
+
owned_by: str = "custom"
|
38 |
+
|
39 |
+
class ChatMessage(BaseModel):
|
40 |
+
role: str
|
41 |
+
content: str
|
42 |
+
|
43 |
+
class ChatCompletionRequest(BaseModel):
|
44 |
+
model: str
|
45 |
+
messages: List[ChatMessage]
|
46 |
+
temperature: Optional[float] = 1.0
|
47 |
+
top_p: Optional[float] = 1.0
|
48 |
+
n: Optional[int] = 1
|
49 |
+
stream: Optional[bool] = False
|
50 |
+
stop: Optional[Union[str, List[str]]] = None
|
51 |
+
max_tokens: Optional[int] = None
|
52 |
+
presence_penalty: Optional[float] = 0.0
|
53 |
+
frequency_penalty: Optional[float] = 0.0
|
54 |
+
logit_bias: Optional[Dict[str, float]] = None
|
55 |
+
user: Optional[str] = None
|
56 |
+
|
57 |
+
class ChatCompletionResponseChoice(BaseModel):
|
58 |
+
index: int
|
59 |
+
message: ChatMessage
|
60 |
+
finish_reason: Optional[str] = None
|
61 |
+
|
62 |
+
class ChatCompletionResponseUsage(BaseModel):
|
63 |
+
prompt_tokens: int
|
64 |
+
completion_tokens: int
|
65 |
+
total_tokens: int
|
66 |
+
|
67 |
+
class ChatCompletionResponse(BaseModel):
|
68 |
+
id: str
|
69 |
+
object: str = "chat.completion"
|
70 |
+
created: int
|
71 |
+
model: str
|
72 |
+
choices: List[ChatCompletionResponseChoice]
|
73 |
+
usage: ChatCompletionResponseUsage
|
74 |
+
|
75 |
+
class DeltaMessage(BaseModel):
|
76 |
+
role: Optional[str] = None
|
77 |
+
content: Optional[str] = None
|
78 |
+
|
79 |
+
class ChatCompletionStreamResponseChoice(BaseModel):
|
80 |
+
index: int
|
81 |
+
delta: DeltaMessage
|
82 |
+
finish_reason: Optional[str] = None
|
83 |
+
|
84 |
+
class ChatCompletionStreamResponse(BaseModel):
|
85 |
+
id: str
|
86 |
+
object: str = "chat.completion.chunk"
|
87 |
+
created: int
|
88 |
+
model: str
|
89 |
+
choices: List[ChatCompletionStreamResponseChoice]
|
90 |
+
|
91 |
+
# Store active conversations
|
92 |
+
conversations: Dict[str, List[ChatMessage]] = {}
|
93 |
+
|
94 |
+
ua = UserAgent()
|
95 |
+
|
96 |
+
def get_next_user_agent():
|
97 |
+
return ua.random
|
98 |
+
|
99 |
+
async def update_vqd_token(user_agent):
|
100 |
+
async with httpx.AsyncClient() as client:
|
101 |
+
try:
|
102 |
+
await client.get("https://duckduckgo.com/country.json", headers={"User-Agent": user_agent})
|
103 |
+
headers = {"x-vqd-accept": "1", "User-Agent": user_agent}
|
104 |
+
response = await client.get("https://duckduckgo.com/duckchat/v1/status", headers=headers)
|
105 |
+
if response.status_code == 200:
|
106 |
+
vqd_token = response.headers.get("x-vqd-4", "")
|
107 |
+
logging.info(f"Fetched new x-vqd-4 token: {vqd_token}")
|
108 |
+
return vqd_token
|
109 |
+
else:
|
110 |
+
logging.warning(f"Failed to fetch x-vqd-4 token. Status code: {response.status_code}")
|
111 |
+
return ""
|
112 |
+
except Exception as e:
|
113 |
+
logging.error(f"Error fetching x-vqd-4 token: {str(e)}")
|
114 |
+
return ""
|
115 |
+
|
116 |
+
async def chat_with_duckduckgo(query: str, model: str, conversation_history: List[ChatMessage]):
|
117 |
+
original_model = MODEL_MAPPING.get(model, model)
|
118 |
+
user_agent = get_next_user_agent()
|
119 |
+
vqd_token = await update_vqd_token(user_agent)
|
120 |
+
if not vqd_token:
|
121 |
+
raise HTTPException(status_code=500, detail="Failed to obtain VQD token")
|
122 |
+
|
123 |
+
# If there is a system message, add it before the first user message (DDG AI doesnt let us send system messages, so this is a workaround -- fundamentally, it works the same way when setting a system prompt)
|
124 |
+
system_message = next((msg for msg in conversation_history if msg.role == "system"), None)
|
125 |
+
user_messages = [{"role": msg.role, "content": msg.content} for msg in conversation_history if msg.role == "user"]
|
126 |
+
|
127 |
+
if system_message and user_messages:
|
128 |
+
user_messages[0]["content"] = f"{system_message.content}\n\n{user_messages[0]['content']}"
|
129 |
+
|
130 |
+
payload = {
|
131 |
+
"messages": user_messages,
|
132 |
+
"model": original_model
|
133 |
+
}
|
134 |
+
|
135 |
+
headers = {
|
136 |
+
"x-vqd-4": vqd_token,
|
137 |
+
"Content-Type": "application/json",
|
138 |
+
"User-Agent": user_agent
|
139 |
+
}
|
140 |
+
|
141 |
+
logging.info(f"Sending payload to DuckDuckGo with User-Agent: {user_agent}")
|
142 |
+
|
143 |
+
async with httpx.AsyncClient() as client:
|
144 |
+
try:
|
145 |
+
response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
|
146 |
+
if response.status_code == 200:
|
147 |
+
full_response = ""
|
148 |
+
async for line in response.aiter_lines():
|
149 |
+
if line.startswith("data: "):
|
150 |
+
data = line[6:].strip()
|
151 |
+
if data == "[DONE]":
|
152 |
+
break
|
153 |
+
try:
|
154 |
+
json_data = json.loads(data)
|
155 |
+
message = json_data.get("message", "")
|
156 |
+
full_response += message
|
157 |
+
yield message
|
158 |
+
except json.JSONDecodeError:
|
159 |
+
logging.warning(f"Failed to parse JSON: {data}")
|
160 |
+
elif response.status_code == 429:
|
161 |
+
logging.warning("Rate limit exceeded. Changing User-Agent and retrying.")
|
162 |
+
for attempt in range(5): # Try up to 5 times
|
163 |
+
user_agent = get_next_user_agent()
|
164 |
+
vqd_token = await update_vqd_token(user_agent)
|
165 |
+
headers["User-Agent"] = user_agent
|
166 |
+
headers["x-vqd-4"] = vqd_token
|
167 |
+
logging.info(f"Retrying with new User-Agent: {user_agent}")
|
168 |
+
response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
|
169 |
+
if response.status_code == 200:
|
170 |
+
async for line in response.aiter_lines():
|
171 |
+
if line.startswith("data: "):
|
172 |
+
data = line[6:].strip()
|
173 |
+
if data == "[DONE]":
|
174 |
+
break
|
175 |
+
try:
|
176 |
+
json_data = json.loads(data)
|
177 |
+
message = json_data.get("message", "")
|
178 |
+
yield message
|
179 |
+
except json.JSONDecodeError:
|
180 |
+
logging.warning(f"Failed to parse JSON: {data}")
|
181 |
+
break
|
182 |
+
else:
|
183 |
+
raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
|
184 |
+
else:
|
185 |
+
logging.error(f"Error response from DuckDuckGo. Status code: {response.status_code}")
|
186 |
+
raise HTTPException(status_code=response.status_code, detail=f"Error communicating with DuckDuckGo: {response.text}")
|
187 |
+
except httpx.HTTPStatusError as e:
|
188 |
+
logging.error(f"HTTP error occurred: {str(e)}")
|
189 |
+
raise HTTPException(status_code=e.response.status_code, detail=str(e))
|
190 |
+
except httpx.RequestError as e:
|
191 |
+
logging.error(f"Request error occurred: {str(e)}")
|
192 |
+
raise HTTPException(status_code=500, detail=str(e))
|
193 |
+
except Exception as e:
|
194 |
+
logging.error(f"Unexpected error in chat_with_duckduckgo: {str(e)}")
|
195 |
+
raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
|
196 |
+
|
197 |
+
@app.get("/v1/models")
|
198 |
+
async def list_models():
|
199 |
+
logging.info("Listing available models")
|
200 |
+
models = [ModelInfo(id=model_id) for model_id in MODEL_MAPPING.keys()]
|
201 |
+
return {"data": models, "object": "list"}
|
202 |
+
|
203 |
+
@app.post("/v1/chat/completions")
|
204 |
+
async def chat_completion(request: ChatCompletionRequest):
|
205 |
+
conversation_id = str(uuid.uuid4())
|
206 |
+
|
207 |
+
logging.info(f"Received chat completion request for conversation {conversation_id}")
|
208 |
+
logging.info(f"Request: {request.model_dump()}")
|
209 |
+
|
210 |
+
conversation_history = conversations.get(conversation_id, [])
|
211 |
+
conversation_history.extend(request.messages)
|
212 |
+
|
213 |
+
async def generate():
|
214 |
+
try:
|
215 |
+
full_response = ""
|
216 |
+
async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
|
217 |
+
full_response += chunk
|
218 |
+
|
219 |
+
response = ChatCompletionStreamResponse(
|
220 |
+
id=conversation_id,
|
221 |
+
created=int(time.time()),
|
222 |
+
model=request.model,
|
223 |
+
choices=[
|
224 |
+
ChatCompletionStreamResponseChoice(
|
225 |
+
index=0,
|
226 |
+
delta=DeltaMessage(content=chunk),
|
227 |
+
finish_reason=None
|
228 |
+
)
|
229 |
+
]
|
230 |
+
)
|
231 |
+
yield f"data: {response.model_dump_json()}\n\n"
|
232 |
+
await asyncio.sleep(random.uniform(0.05, 0.1))
|
233 |
+
|
234 |
+
final_response = ChatCompletionStreamResponse(
|
235 |
+
id=conversation_id,
|
236 |
+
created=int(time.time()),
|
237 |
+
model=request.model,
|
238 |
+
choices=[
|
239 |
+
ChatCompletionStreamResponseChoice(
|
240 |
+
index=0,
|
241 |
+
delta=DeltaMessage(),
|
242 |
+
finish_reason="stop"
|
243 |
+
)
|
244 |
+
]
|
245 |
+
)
|
246 |
+
yield f"data: {final_response.model_dump_json()}\n\n"
|
247 |
+
yield "data: [DONE]\n\n"
|
248 |
+
except Exception as e:
|
249 |
+
logging.error(f"Error during streaming: {str(e)}")
|
250 |
+
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
251 |
+
|
252 |
+
if request.stream:
|
253 |
+
return StreamingResponse(generate(), media_type="text/event-stream")
|
254 |
+
else:
|
255 |
+
full_response = ""
|
256 |
+
async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
|
257 |
+
full_response += chunk
|
258 |
+
|
259 |
+
response = ChatCompletionResponse(
|
260 |
+
id=conversation_id,
|
261 |
+
created=int(time.time()),
|
262 |
+
model=request.model,
|
263 |
+
choices=[
|
264 |
+
ChatCompletionResponseChoice(
|
265 |
+
index=0,
|
266 |
+
message=ChatMessage(role="assistant", content=full_response),
|
267 |
+
finish_reason="stop"
|
268 |
+
)
|
269 |
+
],
|
270 |
+
usage=ChatCompletionResponseUsage(
|
271 |
+
prompt_tokens=sum(len(msg.content.split()) for msg in conversation_history),
|
272 |
+
completion_tokens=len(full_response.split()),
|
273 |
+
total_tokens=sum(len(msg.content.split()) for msg in conversation_history) + len(full_response.split())
|
274 |
+
)
|
275 |
+
)
|
276 |
+
|
277 |
+
conversation_history.append(ChatMessage(role="assistant", content=full_response))
|
278 |
+
conversations[conversation_id] = conversation_history
|
279 |
+
|
280 |
+
return response
|
281 |
+
|
282 |
+
@app.delete("/v1/conversations/{conversation_id}")
|
283 |
+
async def end_conversation(conversation_id: str):
|
284 |
+
if conversation_id in conversations:
|
285 |
+
del conversations[conversation_id]
|
286 |
+
logging.info(f"Conversation {conversation_id} ended and context cleared")
|
287 |
+
return {"message": f"Conversation {conversation_id} ended and context cleared."}
|
288 |
+
else:
|
289 |
+
logging.warning(f"Attempt to end non-existent conversation {conversation_id}")
|
290 |
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
291 |
+
|
292 |
+
if __name__ == "__main__":
|
293 |
+
import uvicorn
|
294 |
+
uvicorn.run(app, host="0.0.0.0", port=1337)
|