nanoppa commited on
Commit
4132c6b
·
verified ·
1 Parent(s): 16dd217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -263,8 +263,9 @@ async def chat_completion(request: ChatRequest, authorization: str = Header(None
263
  return response.model_dump()
264
  else:
265
  gemini_messages = convert_messages_to_gemini_format(request.messages)
 
266
  payload = {}
267
- if request.model == "gemini-2.0-flash-exp-search":
268
  payload = {
269
  "contents": gemini_messages,
270
  "generationConfig": {
@@ -276,6 +277,7 @@ async def chat_completion(request: ChatRequest, authorization: str = Header(None
276
  }
277
  ]
278
  }
 
279
  else:
280
  payload = {
281
  "contents": gemini_messages,
@@ -293,7 +295,7 @@ async def chat_completion(request: ChatRequest, authorization: str = Header(None
293
  while retries < MAX_RETRIES:
294
  try:
295
  async with httpx.AsyncClient() as client:
296
- stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:streamGenerateContent?alt=sse&key={api_key}"
297
  async with client.stream("POST", stream_url, json=payload, timeout=60.0) as response:
298
  if response.status_code == 429:
299
  logger.warning(f"Rate limit reached for key: {api_key}")
@@ -377,7 +379,7 @@ async def chat_completion(request: ChatRequest, authorization: str = Header(None
377
  return StreamingResponse(content=generate(), media_type="text/event-stream")
378
  else:
379
  async with httpx.AsyncClient() as client:
380
- non_stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{request.model}:generateContent?key={api_key}"
381
  response = await client.post(non_stream_url, json=payload)
382
 
383
  if response.status_code != 200:
@@ -397,7 +399,7 @@ async def chat_completion(request: ChatRequest, authorization: str = Header(None
397
 
398
  gemini_response = response.json()
399
  logger.info("Chat completion successful")
400
- return await convert_gemini_response_to_openai(gemini_response, request.model)
401
 
402
  except Exception as e:
403
  logger.error(f"Error in chat completion: {str(e)}")
 
263
  return response.model_dump()
264
  else:
265
  gemini_messages = convert_messages_to_gemini_format(request.messages)
266
+ model = request.model
267
  payload = {}
268
+ if model == "gemini-2.0-flash-exp-search":
269
  payload = {
270
  "contents": gemini_messages,
271
  "generationConfig": {
 
277
  }
278
  ]
279
  }
280
+ model = "gemini-2.0-flash-exp"
281
  else:
282
  payload = {
283
  "contents": gemini_messages,
 
295
  while retries < MAX_RETRIES:
296
  try:
297
  async with httpx.AsyncClient() as client:
298
+ stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:streamGenerateContent?alt=sse&key={api_key}"
299
  async with client.stream("POST", stream_url, json=payload, timeout=60.0) as response:
300
  if response.status_code == 429:
301
  logger.warning(f"Rate limit reached for key: {api_key}")
 
379
  return StreamingResponse(content=generate(), media_type="text/event-stream")
380
  else:
381
  async with httpx.AsyncClient() as client:
382
+ non_stream_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
383
  response = await client.post(non_stream_url, json=payload)
384
 
385
  if response.status_code != 200:
 
399
 
400
  gemini_response = response.json()
401
  logger.info("Chat completion successful")
402
+ return await convert_gemini_response_to_openai(gemini_response, model)
403
 
404
  except Exception as e:
405
  logger.error(f"Error in chat completion: {str(e)}")