Abhaykoul commited on
Commit
6c63fd2
1 Parent(s): c13662e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -0
app.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from typing import List, Dict, Optional, Union
5
+ from fastapi.responses import StreamingResponse
6
+ import logging
7
+ import uuid
8
+ import time
9
+ import json
10
+ import asyncio
11
+ import random
12
+ import httpx
13
+ from fake_useragent import UserAgent
14
+
15
+ app = FastAPI()
16
+
17
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
+
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"],
22
+ allow_credentials=True,
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
+ )
26
+ MODEL_MAPPING = {
27
+ "keyless-gpt-4o-mini": "gpt-4o-mini",
28
+ "keyless-claude-3-haiku": "claude-3-haiku-20240307",
29
+ "keyless-mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
+ "keyless-meta-Llama-3.1-70B-Instruct-Turbo": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
31
+ }
32
+
33
+ class ModelInfo(BaseModel):
34
+ id: str
35
+ object: str = "model"
36
+ created: int = int(time.time())
37
+ owned_by: str = "custom"
38
+
39
+ class ChatMessage(BaseModel):
40
+ role: str
41
+ content: str
42
+
43
+ class ChatCompletionRequest(BaseModel):
44
+ model: str
45
+ messages: List[ChatMessage]
46
+ temperature: Optional[float] = 1.0
47
+ top_p: Optional[float] = 1.0
48
+ n: Optional[int] = 1
49
+ stream: Optional[bool] = False
50
+ stop: Optional[Union[str, List[str]]] = None
51
+ max_tokens: Optional[int] = None
52
+ presence_penalty: Optional[float] = 0.0
53
+ frequency_penalty: Optional[float] = 0.0
54
+ logit_bias: Optional[Dict[str, float]] = None
55
+ user: Optional[str] = None
56
+
57
+ class ChatCompletionResponseChoice(BaseModel):
58
+ index: int
59
+ message: ChatMessage
60
+ finish_reason: Optional[str] = None
61
+
62
+ class ChatCompletionResponseUsage(BaseModel):
63
+ prompt_tokens: int
64
+ completion_tokens: int
65
+ total_tokens: int
66
+
67
+ class ChatCompletionResponse(BaseModel):
68
+ id: str
69
+ object: str = "chat.completion"
70
+ created: int
71
+ model: str
72
+ choices: List[ChatCompletionResponseChoice]
73
+ usage: ChatCompletionResponseUsage
74
+
75
+ class DeltaMessage(BaseModel):
76
+ role: Optional[str] = None
77
+ content: Optional[str] = None
78
+
79
+ class ChatCompletionStreamResponseChoice(BaseModel):
80
+ index: int
81
+ delta: DeltaMessage
82
+ finish_reason: Optional[str] = None
83
+
84
+ class ChatCompletionStreamResponse(BaseModel):
85
+ id: str
86
+ object: str = "chat.completion.chunk"
87
+ created: int
88
+ model: str
89
+ choices: List[ChatCompletionStreamResponseChoice]
90
+
91
+ # Store active conversations
92
+ conversations: Dict[str, List[ChatMessage]] = {}
93
+
94
+ ua = UserAgent()
95
+
96
+ def get_next_user_agent():
97
+ return ua.random
98
+
99
+ async def update_vqd_token(user_agent):
100
+ async with httpx.AsyncClient() as client:
101
+ try:
102
+ await client.get("https://duckduckgo.com/country.json", headers={"User-Agent": user_agent})
103
+ headers = {"x-vqd-accept": "1", "User-Agent": user_agent}
104
+ response = await client.get("https://duckduckgo.com/duckchat/v1/status", headers=headers)
105
+ if response.status_code == 200:
106
+ vqd_token = response.headers.get("x-vqd-4", "")
107
+ logging.info(f"Fetched new x-vqd-4 token: {vqd_token}")
108
+ return vqd_token
109
+ else:
110
+ logging.warning(f"Failed to fetch x-vqd-4 token. Status code: {response.status_code}")
111
+ return ""
112
+ except Exception as e:
113
+ logging.error(f"Error fetching x-vqd-4 token: {str(e)}")
114
+ return ""
115
+
116
+ async def chat_with_duckduckgo(query: str, model: str, conversation_history: List[ChatMessage]):
117
+ original_model = MODEL_MAPPING.get(model, model)
118
+ user_agent = get_next_user_agent()
119
+ vqd_token = await update_vqd_token(user_agent)
120
+ if not vqd_token:
121
+ raise HTTPException(status_code=500, detail="Failed to obtain VQD token")
122
+
123
+ # If there is a system message, add it before the first user message (DDG AI doesnt let us send system messages, so this is a workaround -- fundamentally, it works the same way when setting a system prompt)
124
+ system_message = next((msg for msg in conversation_history if msg.role == "system"), None)
125
+ user_messages = [{"role": msg.role, "content": msg.content} for msg in conversation_history if msg.role == "user"]
126
+
127
+ if system_message and user_messages:
128
+ user_messages[0]["content"] = f"{system_message.content}\n\n{user_messages[0]['content']}"
129
+
130
+ payload = {
131
+ "messages": user_messages,
132
+ "model": original_model
133
+ }
134
+
135
+ headers = {
136
+ "x-vqd-4": vqd_token,
137
+ "Content-Type": "application/json",
138
+ "User-Agent": user_agent
139
+ }
140
+
141
+ logging.info(f"Sending payload to DuckDuckGo with User-Agent: {user_agent}")
142
+
143
+ async with httpx.AsyncClient() as client:
144
+ try:
145
+ response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
146
+ if response.status_code == 200:
147
+ full_response = ""
148
+ async for line in response.aiter_lines():
149
+ if line.startswith("data: "):
150
+ data = line[6:].strip()
151
+ if data == "[DONE]":
152
+ break
153
+ try:
154
+ json_data = json.loads(data)
155
+ message = json_data.get("message", "")
156
+ full_response += message
157
+ yield message
158
+ except json.JSONDecodeError:
159
+ logging.warning(f"Failed to parse JSON: {data}")
160
+ elif response.status_code == 429:
161
+ logging.warning("Rate limit exceeded. Changing User-Agent and retrying.")
162
+ for attempt in range(5): # Try up to 5 times
163
+ user_agent = get_next_user_agent()
164
+ vqd_token = await update_vqd_token(user_agent)
165
+ headers["User-Agent"] = user_agent
166
+ headers["x-vqd-4"] = vqd_token
167
+ logging.info(f"Retrying with new User-Agent: {user_agent}")
168
+ response = await client.post("https://duckduckgo.com/duckchat/v1/chat", json=payload, headers=headers)
169
+ if response.status_code == 200:
170
+ async for line in response.aiter_lines():
171
+ if line.startswith("data: "):
172
+ data = line[6:].strip()
173
+ if data == "[DONE]":
174
+ break
175
+ try:
176
+ json_data = json.loads(data)
177
+ message = json_data.get("message", "")
178
+ yield message
179
+ except json.JSONDecodeError:
180
+ logging.warning(f"Failed to parse JSON: {data}")
181
+ break
182
+ else:
183
+ raise HTTPException(status_code=429, detail="Rate limit exceeded. Please try again later.")
184
+ else:
185
+ logging.error(f"Error response from DuckDuckGo. Status code: {response.status_code}")
186
+ raise HTTPException(status_code=response.status_code, detail=f"Error communicating with DuckDuckGo: {response.text}")
187
+ except httpx.HTTPStatusError as e:
188
+ logging.error(f"HTTP error occurred: {str(e)}")
189
+ raise HTTPException(status_code=e.response.status_code, detail=str(e))
190
+ except httpx.RequestError as e:
191
+ logging.error(f"Request error occurred: {str(e)}")
192
+ raise HTTPException(status_code=500, detail=str(e))
193
+ except Exception as e:
194
+ logging.error(f"Unexpected error in chat_with_duckduckgo: {str(e)}")
195
+ raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
196
+
197
+ @app.get("/v1/models")
198
+ async def list_models():
199
+ logging.info("Listing available models")
200
+ models = [ModelInfo(id=model_id) for model_id in MODEL_MAPPING.keys()]
201
+ return {"data": models, "object": "list"}
202
+
203
+ @app.post("/v1/chat/completions")
204
+ async def chat_completion(request: ChatCompletionRequest):
205
+ conversation_id = str(uuid.uuid4())
206
+
207
+ logging.info(f"Received chat completion request for conversation {conversation_id}")
208
+ logging.info(f"Request: {request.model_dump()}")
209
+
210
+ conversation_history = conversations.get(conversation_id, [])
211
+ conversation_history.extend(request.messages)
212
+
213
+ async def generate():
214
+ try:
215
+ full_response = ""
216
+ async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
217
+ full_response += chunk
218
+
219
+ response = ChatCompletionStreamResponse(
220
+ id=conversation_id,
221
+ created=int(time.time()),
222
+ model=request.model,
223
+ choices=[
224
+ ChatCompletionStreamResponseChoice(
225
+ index=0,
226
+ delta=DeltaMessage(content=chunk),
227
+ finish_reason=None
228
+ )
229
+ ]
230
+ )
231
+ yield f"data: {response.model_dump_json()}\n\n"
232
+ await asyncio.sleep(random.uniform(0.05, 0.1))
233
+
234
+ final_response = ChatCompletionStreamResponse(
235
+ id=conversation_id,
236
+ created=int(time.time()),
237
+ model=request.model,
238
+ choices=[
239
+ ChatCompletionStreamResponseChoice(
240
+ index=0,
241
+ delta=DeltaMessage(),
242
+ finish_reason="stop"
243
+ )
244
+ ]
245
+ )
246
+ yield f"data: {final_response.model_dump_json()}\n\n"
247
+ yield "data: [DONE]\n\n"
248
+ except Exception as e:
249
+ logging.error(f"Error during streaming: {str(e)}")
250
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
251
+
252
+ if request.stream:
253
+ return StreamingResponse(generate(), media_type="text/event-stream")
254
+ else:
255
+ full_response = ""
256
+ async for chunk in chat_with_duckduckgo(" ".join([msg.content for msg in request.messages]), request.model, conversation_history):
257
+ full_response += chunk
258
+
259
+ response = ChatCompletionResponse(
260
+ id=conversation_id,
261
+ created=int(time.time()),
262
+ model=request.model,
263
+ choices=[
264
+ ChatCompletionResponseChoice(
265
+ index=0,
266
+ message=ChatMessage(role="assistant", content=full_response),
267
+ finish_reason="stop"
268
+ )
269
+ ],
270
+ usage=ChatCompletionResponseUsage(
271
+ prompt_tokens=sum(len(msg.content.split()) for msg in conversation_history),
272
+ completion_tokens=len(full_response.split()),
273
+ total_tokens=sum(len(msg.content.split()) for msg in conversation_history) + len(full_response.split())
274
+ )
275
+ )
276
+
277
+ conversation_history.append(ChatMessage(role="assistant", content=full_response))
278
+ conversations[conversation_id] = conversation_history
279
+
280
+ return response
281
+
282
+ @app.delete("/v1/conversations/{conversation_id}")
283
+ async def end_conversation(conversation_id: str):
284
+ if conversation_id in conversations:
285
+ del conversations[conversation_id]
286
+ logging.info(f"Conversation {conversation_id} ended and context cleared")
287
+ return {"message": f"Conversation {conversation_id} ended and context cleared."}
288
+ else:
289
+ logging.warning(f"Attempt to end non-existent conversation {conversation_id}")
290
+ raise HTTPException(status_code=404, detail="Conversation not found")
291
+
292
+ if __name__ == "__main__":
293
+ import uvicorn
294
+ uvicorn.run(app, host="0.0.0.0", port=1337)