pvanand commited on
Commit
9c95ead
·
verified ·
1 Parent(s): 9a4a249

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +131 -723
main.py CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
2
  from fastapi.security import APIKeyHeader
3
  from fastapi.responses import StreamingResponse
@@ -11,142 +23,31 @@ import tiktoken
11
  import sqlite3
12
  import time
13
  from datetime import datetime, timedelta
14
- import asyncio
15
  import requests
16
- from prompts import *
17
- from fastapi_cache import FastAPICache
18
- from fastapi_cache.backends.inmemory import InMemoryBackend
19
- from fastapi_cache.decorator import cache
20
- import logging
21
-
22
- # Configure logging
23
- logging.basicConfig(
24
- level=logging.INFO,
25
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
26
- handlers=[
27
- logging.FileHandler("app.log"),
28
- logging.StreamHandler()
29
- ]
30
- )
31
- logger = logging.getLogger(__name__)
32
-
33
- app = FastAPI()
34
-
35
- API_KEY_NAME = "X-API-Key"
36
- API_KEY = os.environ.get("CHAT_AUTH_KEY", "default_secret_key")
37
- api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
38
-
39
- from speech_api import router as speech_api_router
40
- app.include_router(speech_api_router, prefix="/api/v1", tags=["TTS and ASR"])
41
-
42
- ModelID = Literal[
43
- "openai/gpt-4o-mini",
44
- "meta-llama/llama-3-70b-instruct",
45
- "anthropic/claude-3.5-sonnet",
46
- "deepseek/deepseek-coder",
47
- "anthropic/claude-3-haiku",
48
- "openai/gpt-3.5-turbo-instruct",
49
- "qwen/qwen-72b-chat",
50
- "google/gemma-2-27b-it"
51
- ]
52
-
53
- class QueryModel(BaseModel):
54
- user_query: str = Field(..., description="User's coding query")
55
- model_id: ModelID = Field(
56
- default="meta-llama/llama-3-70b-instruct",
57
- description="ID of the model to use for response generation"
58
- )
59
- conversation_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for the conversation")
60
- user_id: str = Field(..., description="Unique identifier for the user")
61
-
62
- class Config:
63
- schema_extra = {
64
- "example": {
65
- "user_query": "How do I implement a binary search in Python?",
66
- "model_id": "meta-llama/llama-3-70b-instruct",
67
- "conversation_id": "123e4567-e89b-12d3-a456-426614174000",
68
- "user_id": "user123"
69
- }
70
- }
71
 
72
- class NewsQueryModel(BaseModel):
73
- query: str = Field(..., description="News topic to search for")
74
- model_id: ModelID = Field(
75
- default="openai/gpt-4o-mini",
76
- description="ID of the model to use for response generation"
77
- )
78
- class Config:
79
- schema_extra = {
80
- "example": {
81
- "query": "Latest developments in AI",
82
- "model_id": "openai/gpt-4o-mini"
83
- }
84
- }
85
 
86
- @lru_cache()
87
- def get_api_keys():
88
- logger.info("Loading API keys")
89
- return {
90
- "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}",
91
- "BRAVE_API_KEY": os.environ['BRAVE_API_KEY']
 
 
 
92
  }
93
-
94
- api_keys = get_api_keys()
95
- or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
96
-
97
- # In-memory storage for conversations
98
- conversations: Dict[str, List[Dict[str, str]]] = {}
99
- last_activity: Dict[str, float] = {}
100
-
101
- # Token encoding
102
- encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
103
-
104
- def limit_tokens(input_string, token_limit=6000):
105
- return encoding.decode(encoding.encode(input_string)[:token_limit])
106
-
107
- def calculate_tokens(msgs):
108
- return sum(len(encoding.encode(str(m))) for m in msgs)
109
-
110
- def chat_with_llama_stream(messages, model="openai/gpt-4o-mini", max_llm_history=4, max_output_tokens=2500):
111
- logger.info(f"Starting chat with model: {model}")
112
- while calculate_tokens(messages) > (8000 - max_output_tokens):
113
- if len(messages) > max_llm_history:
114
- messages = [messages[0]] + messages[-max_llm_history:]
115
- else:
116
- max_llm_history -= 1
117
- if max_llm_history < 2:
118
- error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
119
- logger.error(error_message)
120
- raise HTTPException(status_code=400, detail=error_message)
121
-
122
- try:
123
- response = or_client.chat.completions.create(
124
- model=model,
125
- messages=messages,
126
- max_tokens=max_output_tokens,
127
- stream=True
128
- )
129
-
130
- full_response = ""
131
- for chunk in response:
132
- if chunk.choices[0].delta.content is not None:
133
- content = chunk.choices[0].delta.content
134
- full_response += content
135
- yield content
136
-
137
- # After streaming, add the full response to the conversation history
138
- messages.append({"role": "assistant", "content": full_response})
139
- logger.info("Chat completed successfully")
140
- except Exception as e:
141
- logger.error(f"Error in model response: {str(e)}")
142
- raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
143
-
144
- async def verify_api_key(api_key: str = Security(api_key_header)):
145
- if api_key != API_KEY:
146
- logger.warning("Invalid API key used")
147
- raise HTTPException(status_code=403, detail="Could not validate credentials")
148
- return api_key
149
-
150
  # SQLite setup
151
  DB_PATH = '/app/data/conversations.db'
152
 
@@ -166,7 +67,13 @@ def init_db():
166
  conn.close()
167
  logger.info("Database initialized successfully")
168
 
169
- init_db()
 
 
 
 
 
 
170
 
171
  def update_db(user_id, conversation_id, message, response):
172
  logger.info(f"Updating database for conversation: {conversation_id}")
@@ -178,185 +85,17 @@ def update_db(user_id, conversation_id, message, response):
178
  conn.close()
179
  logger.info("Database updated successfully")
180
 
181
- async def clear_inactive_conversations():
182
- while True:
183
-
184
- current_time = time.time()
185
- inactive_convos = [conv_id for conv_id, last_time in last_activity.items()
186
- if current_time - last_time > 1800] # 30 minutes
187
- for conv_id in inactive_convos:
188
- if conv_id in conversations:
189
- del conversations[conv_id]
190
- if conv_id in last_activity:
191
- del last_activity[conv_id]
192
- await asyncio.sleep(60) # Check every minute
193
-
194
- @app.on_event("startup")
195
- async def startup_event():
196
- logger.info("Starting up the application")
197
- FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")
198
- asyncio.create_task(clear_inactive_conversations())
199
-
200
- @app.post("/coding-assistant")
201
- async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
202
- """
203
- Coding assistant endpoint that provides programming help based on user queries.
204
- Available models:
205
- - meta-llama/llama-3-70b-instruct (default)
206
- - anthropic/claude-3.5-sonnet
207
- - deepseek/deepseek-coder
208
- - anthropic/claude-3-haiku
209
- - openai/gpt-3.5-turbo-instruct
210
- - qwen/qwen-72b-chat
211
- - google/gemma-2-27b-it
212
- - openai/gpt-4o-mini
213
- Requires API Key authentication via X-API-Key header.
214
- """
215
- logger.info(f"Received coding assistant query: {query.user_query}")
216
- if query.conversation_id not in conversations:
217
- conversations[query.conversation_id] = [
218
- {"role": "system", "content": "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."}
219
- ]
220
-
221
- conversations[query.conversation_id].append({"role": "user", "content": query.user_query})
222
- last_activity[query.conversation_id] = time.time()
223
-
224
- # Limit tokens in the conversation history
225
- limited_conversation = conversations[query.conversation_id]
226
-
227
- def process_response():
228
- full_response = ""
229
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
230
- full_response += content
231
- yield content
232
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.user_query, full_response)
233
- logger.info(f"Completed coding assistant response for query: {query.user_query}")
234
-
235
- return StreamingResponse(process_response(), media_type="text/event-stream")
236
-
237
- # New functions for news assistant
238
-
239
- def internet_search(query, search_type="web", num_results=20):
240
- logger.info(f"Performing internet search for query: {query}, type: {search_type}")
241
- url = f"https://api.search.brave.com/res/v1/{'web' if search_type == 'web' else 'news'}/search"
242
-
243
- headers = {
244
- "Accept": "application/json",
245
- "Accept-Encoding": "gzip",
246
- "X-Subscription-Token": api_keys["BRAVE_API_KEY"]
247
- }
248
- params = {"q": query}
249
-
250
- response = requests.get(url, headers=headers, params=params)
251
-
252
- if response.status_code != 200:
253
- logger.error(f"Failed to fetch search results. Status code: {response.status_code}")
254
- return []
255
-
256
- search_data = response.json()["web"]["results"] if search_type == "web" else response.json()["results"]
257
-
258
- processed_results = [
259
- {
260
- "title": item["title"],
261
- "snippet": item["extra_snippets"][0],
262
- "last_updated": item.get("age", ""),
263
- "url":item.get("url", "")
264
- }
265
- for item in search_data
266
- if item.get("extra_snippets")
267
- ][:num_results]
268
-
269
- logger.info(f"Retrieved {len(processed_results)} search results")
270
- return processed_results
271
-
272
- @lru_cache(maxsize=100)
273
- def cached_internet_search(query: str):
274
- logger.info(f"Performing cached internet search for query: {query}")
275
- return internet_search(query, search_type="news")
276
-
277
- def analyze_data(query, data_type="news"):
278
- logger.info(f"Analyzing {data_type} for query: {query}")
279
-
280
- if data_type == "news":
281
- data = cached_internet_search(query)
282
- prompt_generator = generate_news_prompt
283
- system_prompt = NEWS_ASSISTANT_PROMPT
284
- else:
285
- data = internet_search(query, search_type="web")
286
- prompt_generator = generate_search_prompt
287
- system_prompt = SEARCH_ASSISTANT_PROMPT
288
-
289
- if not data:
290
- logger.error(f"Failed to fetch {data_type} data")
291
- return None
292
-
293
- prompt = prompt_generator(query, data)
294
- messages = [
295
- {"role": "system", "content": system_prompt},
296
- {"role": "user", "content": prompt}
297
- ]
298
 
299
- logger.info(f"{data_type.capitalize()} analysis completed")
300
- return messages,data
301
-
302
- class QueryModel(BaseModel):
303
- query: str = Field(..., description="Search query")
304
- model_id: ModelID = Field(
305
- default="openai/gpt-4o-mini",
306
- description="ID of the model to use for response generation"
307
- )
308
- class Config:
309
- schema_extra = {
310
- "example": {
311
- "query": "What are the latest advancements in quantum computing?",
312
- "model_id": "meta-llama/llama-3-70b-instruct"
313
- }
314
- }
315
-
316
- def search_assistant_api(query, data_type, model="openai/gpt-4o-mini"):
317
- logger.info(f"Received {data_type} assistant query: {query}")
318
- messages, search_data = analyze_data(query, data_type)
319
-
320
- if not messages:
321
- logger.error(f"Failed to fetch {data_type} data")
322
- raise HTTPException(status_code=500, detail=f"Failed to fetch {data_type} data")
323
-
324
- def process_response():
325
- logger.info(f"Generating response using LLM: {messages}")
326
- full_response = ""
327
- for content in chat_with_llama_stream(messages, model=model):
328
- full_response += content
329
- yield content
330
- logger.info(f"Completed {data_type} assistant response for query: {query}")
331
- logger.info(f"LLM Response: {full_response}")
332
- yield "<json><ref>"+ json.dumps(search_data)+"</ref></json>"
333
- return process_response
334
-
335
- def create_streaming_response(generator):
336
- return StreamingResponse(generator(), media_type="text/event-stream")
337
-
338
- @app.post("/news-assistant")
339
- async def news_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
340
- """
341
- News assistant endpoint that provides summaries and analysis of recent news based on user queries.
342
- Requires API Key authentication via X-API-Key header.
343
- """
344
- response_generator = search_assistant_api(query.query, "news", model=query.model_id)
345
- return create_streaming_response(response_generator)
346
-
347
- @app.post("/search-assistant")
348
- async def search_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
349
- """
350
- Search assistant endpoint that provides summaries and analysis of web search results based on user queries.
351
- Requires API Key authentication via X-API-Key header.
352
- """
353
- response_generator = search_assistant_api(query.query, "web", model=query.model_id)
354
- return create_streaming_response(response_generator)
355
-
356
- from pydantic import BaseModel, Field
357
- import yaml
358
- import json
359
- from yaml.loader import SafeLoader
360
 
361
  class FollowupQueryModel(BaseModel):
362
  query: str = Field(..., description="User's query for the followup agent")
@@ -366,10 +105,6 @@ class FollowupQueryModel(BaseModel):
366
  )
367
  conversation_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for the conversation")
368
  user_id: str = Field(..., description="Unique identifier for the user")
369
- tool_call: Literal["web", "news", "auto"] = Field(
370
- default="auto",
371
- description="Type of tool to call (web, news, auto)"
372
- )
373
 
374
  class Config:
375
  schema_extra = {
@@ -382,427 +117,100 @@ class FollowupQueryModel(BaseModel):
382
  }
383
  }
384
 
385
- import re
386
-
387
- def parse_followup_and_tools(input_text):
388
- # Remove extra brackets and excess quotes
389
- cleaned_text = re.sub(r'\[|\]|"+', ' ', input_text)
390
-
391
- # Extract response content
392
- response_pattern = re.compile(r'<response>(.*?)</response>', re.DOTALL)
393
- response_parts = response_pattern.findall(cleaned_text)
394
- combined_response = ' '.join(response_parts)
395
-
396
- # Normalize spaces in the combined response
397
- combined_response = ' '.join(combined_response.split())
398
-
399
- parsed_interacts = []
400
- parsed_tools = []
401
-
402
- # Parse interacts and tools
403
- blocks = re.finditer(r'<(interact|tools?)(.*?)>(.*?)</\1>', cleaned_text, re.DOTALL)
404
- for block in blocks:
405
- block_type, _, content = block.groups()
406
- content = content.strip()
407
-
408
- if block_type == 'interact':
409
- question_blocks = re.split(r'\s*-\s*text:', content)[1:]
410
- for qblock in question_blocks:
411
- parts = re.split(r'\s*options:\s*', qblock, maxsplit=1)
412
- if len(parts) == 2:
413
- question = ' '.join(parts[0].split()) # Normalize spaces
414
- options = [' '.join(opt.split()) for opt in re.split(r'\s*-\s*', parts[1]) if opt.strip()]
415
- parsed_interacts.append({'question': question, 'options': options})
416
-
417
- elif block_type.startswith('tool'): # This will match both 'tool' and 'tools'
418
- tool_match = re.search(r'text:\s*(.*?)\s*options:\s*-\s*(.*)', content, re.DOTALL)
419
- if tool_match:
420
- tool_name = ' '.join(tool_match.group(1).split()) # Normalize spaces
421
- option = ' '.join(tool_match.group(2).split()) # Normalize spaces
422
- parsed_tools.append({'name': tool_name, 'input': option})
423
-
424
- return combined_response, parsed_interacts, parsed_tools
425
-
426
- @app.post("/followup-agent")
427
- async def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
428
- """
429
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
430
- Requires API Key authentication via X-API-Key header.
431
- """
432
- logger.info(f"Received followup agent query: {query.query}")
433
-
434
- if query.conversation_id not in conversations:
435
- conversations[query.conversation_id] = [
436
- {"role": "system", "content": FOLLOWUP_AGENT_PROMPT}
437
- ]
438
-
439
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
440
- last_activity[query.conversation_id] = time.time()
441
-
442
- # Limit tokens in the conversation history
443
- limited_conversation = conversations[query.conversation_id]
444
-
445
- def process_response():
446
- full_response = ""
447
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
448
- full_response += content
449
- yield content
450
-
451
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
452
- response_content, interact,tools = parse_followup_and_tools(full_response)
453
-
454
- result = {
455
- "response": response_content,
456
- "clarification": interact
457
- }
458
-
459
- yield "\n\n" + json.dumps(result)
460
-
461
- # Add the assistant's response to the conversation history
462
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
463
-
464
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
465
- logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
466
-
467
- return StreamingResponse(process_response(), media_type="text/event-stream")
468
-
469
- @app.post("/v2/followup-agent")
470
- async def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
471
- """
472
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
473
- Requires API Key authentication via X-API-Key header.
474
- """
475
- logger.info(f"Received followup agent query: {query.query}")
476
-
477
- if query.conversation_id not in conversations:
478
- conversations[query.conversation_id] = [
479
- {"role": "system", "content": FOLLOWUP_AGENT_PROMPT}
480
- ]
481
-
482
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
483
- last_activity[query.conversation_id] = time.time()
484
-
485
- # Limit tokens in the conversation history
486
- limited_conversation = conversations[query.conversation_id]
487
-
488
- def process_response():
489
- full_response = ""
490
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
491
- full_response += content
492
- yield content
493
-
494
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
495
- response_content, interact,tools = parse_followup_and_tools(full_response)
496
-
497
- result = {
498
- "clarification": interact
499
- }
500
-
501
- yield "\n<json>"
502
- yield json.dumps(result)
503
-
504
-
505
- # Add the assistant's response to the conversation history
506
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
507
-
508
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
509
- logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
510
 
511
- return StreamingResponse(process_response(), media_type="text/event-stream")
512
-
513
- @app.post("/v2/followup-tools-agent")
514
- def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
515
- """
516
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
517
- Requires API Key authentication via X-API-Key header.
518
- """
519
- logger.info(f"Received followup agent query: {query.query}")
520
- if query.conversation_id not in conversations:
521
- conversations[query.conversation_id] = [
522
- {"role": "system", "content": MULTI_AGENT_PROMPT_V2}
523
- ]
524
-
525
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
526
- last_activity[query.conversation_id] = time.time()
527
-
528
- # Limit tokens in the conversation history
529
- limited_conversation = conversations[query.conversation_id]
530
-
531
- def process_response():
532
- full_response = ""
533
- result = dict()
534
-
535
- # Check if tool_call is specified and call the tool directly
536
- if query.tool_call in ["web", "news"]:
537
- search_query = query.query
538
- search_response = search_assistant_api(search_query, query.tool_call, model=query.model_id)
539
-
540
- yield "<report>"
541
- for content in search_response():
542
- yield content
543
- full_response += content
544
- yield "</report>"
545
- else:
546
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
547
- yield content
548
- full_response += content
549
-
550
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
551
- response_content, interact, tools = parse_followup_and_tools(full_response)
552
-
553
- result = {
554
- "clarification": interact,
555
- "tools": tools
556
- }
557
-
558
- yield "<json>"+ json.dumps(result)+"</json>"
559
-
560
-
561
- # Process tool if present
562
- if tools and len(tools) > 0:
563
- tool = tools[0] # Assume only one tool is present
564
- if tool["name"] in ["news", "web"]:
565
- search_query = tool["input"]
566
- search_response = search_assistant_api(search_query, tool["name"], model=query.model_id)
567
-
568
- yield "<report>"
569
- for content in search_response():
570
- yield content
571
- full_response += content
572
- yield "</report>"
573
-
574
- # Add the assistant's response to the conversation history
575
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
576
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
577
- logger.info(f"Completed followup agent response for query: {query.query}, send result:{result}, Full response: {full_response}")
578
-
579
- return StreamingResponse(process_response(), media_type="text/event-stream")
580
-
581
-
582
- @app.post("/v3/followup-agent")
583
- async def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
584
- """
585
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
586
- Requires API Key authentication via X-API-Key header.
587
- """
588
- logger.info(f"Received followup agent query: {query.query}")
589
-
590
- if query.conversation_id not in conversations:
591
- conversations[query.conversation_id] = [
592
- {"role": "system", "content": FOLLOWUP_AGENT_PROMPT}
593
- ]
594
-
595
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
596
- last_activity[query.conversation_id] = time.time()
597
-
598
- # Limit tokens in the conversation history
599
- limited_conversation = conversations[query.conversation_id]
600
 
601
-
602
- async def process_response():
603
- yield "<followup-response>\n\n"
604
- full_response = ""
605
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
606
- full_response += content
607
- yield content
608
- yield "</followup-response>\n\n"
609
-
610
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
611
-
612
- # Add a slight delay after sending the full LLM response
613
- await asyncio.sleep(0.01)
614
-
615
- response_content, interact, tools = parse_followup_and_tools(full_response)
616
- result = {
617
- "clarification": interact
618
- }
619
-
620
- yield "<followup-json>\n\n"
621
- yield json.dumps(result) + "\n\n"
622
- yield "</followup-json>\n\n"
623
-
624
- # Add the assistant's response to the conversation history
625
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
626
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
627
- logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
628
-
629
- return StreamingResponse(process_response(), media_type="text/event-stream")
630
-
631
-
632
- @app.post("/v4/followup-agent")
633
- async def followup_agent_v4(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
634
- """
635
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
636
- Requires API Key authentication via X-API-Key header.
637
- """
638
- logger.info(f"Received followup agent query: {query.query}")
639
-
640
- if query.conversation_id not in conversations:
641
- conversations[query.conversation_id] = [
642
- {"role": "system", "content": FOLLOWUP_AGENT_PROMPT}
643
- ]
644
-
645
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
646
- last_activity[query.conversation_id] = time.time()
647
-
648
- # Limit tokens in the conversation history
649
- limited_conversation = conversations[query.conversation_id]
650
-
651
-
652
- async def process_response():
653
- yield "<followup-response>"+"\n"
654
- full_response = ""
655
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
656
- full_response += content
657
- yield content
658
- yield "</followup-response>"+"\n"
659
- yield "--END_SECTION--\n"
660
-
661
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
662
-
663
-
664
- response_content, interact, tools = parse_followup_and_tools(full_response)
665
- result = {
666
- "clarification": interact
667
- }
668
-
669
- yield "<followup-json>" + "\n"
670
- yield json.dumps(result) + "\n"
671
- yield "</followup-json>" +"\n"
672
- yield "--END_SECTION--\n"
673
- # Add the assistant's response to the conversation history
674
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
675
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
676
- logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
677
-
678
- return StreamingResponse(process_response(), media_type="text/event-stream")
679
-
680
- ## Digiyatra
681
 
682
  @app.post("/digiyatra-followup")
683
- async def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
684
  """
685
  Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
686
  Requires API Key authentication via X-API-Key header.
687
  """
688
- logger.info(f"Received followup agent query: {query.query}")
689
-
690
- if query.conversation_id not in conversations:
691
- conversations[query.conversation_id] = [
692
- {"role": "system", "content": FOLLOWUP_DIGIYATRA_PROMPT}
693
- ]
694
-
695
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
696
- last_activity[query.conversation_id] = time.time()
697
-
698
- # Limit tokens in the conversation history
699
- limited_conversation = conversations[query.conversation_id]
700
-
701
- def process_response():
702
- full_response = ""
703
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
704
- full_response += content
705
- yield content
706
-
707
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
708
- response_content, interact,tools = parse_followup_and_tools(full_response)
709
-
710
- result = {
711
- "response": response_content,
712
- "clarification": interact
713
- }
714
-
715
- yield "\n\n" + json.dumps(result)
716
-
717
- # Add the assistant's response to the conversation history
718
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
719
-
720
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
721
- logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
722
-
723
- return StreamingResponse(process_response(), media_type="text/event-stream")
724
-
725
-
726
- @app.post("/v2/digiyatra-followup")
727
- async def digi_followup_agent_v2(query: FollowupQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
728
- """
729
- Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
730
- Requires API Key authentication via X-API-Key header.
731
- """
732
- logger.info(f"Received followup agent query: {query.query}")
733
-
734
- if query.conversation_id not in conversations:
735
- conversations[query.conversation_id] = [
736
- {"role": "system", "content": FOLLOWUP_DIGIYATRA_PROMPT}
737
- ]
738
-
739
- conversations[query.conversation_id].append({"role": "user", "content": query.query})
740
- last_activity[query.conversation_id] = time.time()
741
-
742
- # Limit tokens in the conversation history
743
- limited_conversation = conversations[query.conversation_id]
744
-
745
- def process_response():
746
- full_response = ""
747
- for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
748
- full_response += content
749
- yield json.dumps({"type": "response","content": content}) + "\n"
750
-
751
- logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
752
- response_content, interact,tools = parse_followup_and_tools(full_response)
753
-
754
- result = {
755
- "response": response_content,
756
- "clarification": interact
757
- }
758
- yield json.dumps({"type": "interact","content": result}) +"\n"
759
-
760
- # Add the assistant's response to the conversation history
761
- conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
762
-
763
- background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
764
- logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
765
-
766
- return StreamingResponse(process_response(), media_type="text/event-stream")
767
-
768
-
769
- from document_generator import router as document_generator_router
770
- app.include_router(document_generator_router, prefix="/api/v1")
771
-
772
- from document_generator_v2 import router as document_generator_router_v2
773
- app.include_router(document_generator_router_v2, prefix="/api/v2")
774
-
775
- from document_generator_v3 import router as document_generator_router_v3
776
- app.include_router(document_generator_router_v3, prefix="/api/v3")
777
-
778
- from document_generator_v4 import router as document_generator_router_v4
779
- app.include_router(document_generator_router_v4, prefix="/api/v4")
780
 
781
  from fastapi.middleware.cors import CORSMiddleware
782
 
783
- # CORS middleware setup
784
  app.add_middleware(
785
  CORSMiddleware,
786
- allow_origins=[
787
- "http://127.0.0.1:5501/",
788
- "http://localhost:3000",
789
- "https://www.elevaticsai.com",
790
- "https://www.elevatics.cloud",
791
- "https://www.elevatics.online",
792
- "https://www.elevatics.ai",
793
- "https://elevaticsai.com",
794
- "https://elevatics.cloud",
795
- "https://elevatics.online",
796
- "https://elevatics.ai",
797
- "https://pvanand-specialized-agents.hf.space",
798
- "https://pvanand-general-chat.hf.space"
799
- ],
800
  allow_credentials=True,
801
- allow_methods=["GET", "POST"],
802
  allow_headers=["*"],
803
- expose_headers=["Content-Disposition"]
804
  )
805
- if __name__ == "__main__":
806
- import uvicorn
807
- logger.info("Starting the application")
808
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Digiyatra
2
+ from fastapi import FastAPI, Depends, BackgroundTasks, HTTPException, APIRouter, Query, Header
3
+ from pydantic import BaseModel
4
+ from typing import List, Dict, Optional, Union, Annotated, Any
5
+ from openai import AsyncOpenAI
6
+ from observability import LLMObservabilityManager, log_execution, logger
7
+ from aiclient import DatabaseManager, AIClient
8
+ from limit_tokens import trim_messages_openai
9
+ from prompts import FOLLOWUP_DIGIYATRA_PROMPT
10
+ from utils import parse_followup_and_tools
11
+ from sse_starlette.sse import EventSourceResponse
12
+ ##
13
  from fastapi import FastAPI, HTTPException, Depends, Security, BackgroundTasks
14
  from fastapi.security import APIKeyHeader
15
  from fastapi.responses import StreamingResponse
 
23
  import sqlite3
24
  import time
25
  from datetime import datetime, timedelta
26
+ import pandas as pd
27
  import requests
28
+ import json
29
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ from pydantic import BaseModel, Field
32
+ import yaml
33
+ import json
34
+ from yaml.loader import SafeLoader
 
 
 
 
 
 
 
 
 
35
 
36
+ app = FastAPI(
37
+ title="Digiyatra Chatbot",
38
+ description="Digiyatra Chatbot",
39
+ version="1.0.0",
40
+ tags=["chat"],
41
+ contact={
42
+ "name": "Digiyatra",
43
+ "url": "https://digiyatra.com",
44
+ "email": "[email protected]"
45
  }
46
+ )
47
+ from observability_router import router as observability_router
48
+ from rag_routerv2 import router as rag_router, query_table, QueryTableResponse, get_db_connection
49
+ app.include_router(observability_router)
50
+ app.include_router(rag_router)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # SQLite setup
52
  DB_PATH = '/app/data/conversations.db'
53
 
 
67
  conn.close()
68
  logger.info("Database initialized successfully")
69
 
70
+
71
+ # In-memory storage for conversations
72
+ conversations: Dict[str, List[Dict[str, str]]] = {}
73
+ last_activity: Dict[str, float] = {}
74
+
75
+ from aiclient import AIClient
76
+ client = AIClient()
77
 
78
  def update_db(user_id, conversation_id, message, response):
79
  logger.info(f"Updating database for conversation: {conversation_id}")
 
85
  conn.close()
86
  logger.info("Database updated successfully")
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ ModelID = Literal[
90
+ "openai/gpt-4o-mini",
91
+ "meta-llama/llama-3-70b-instruct",
92
+ "anthropic/claude-3.5-sonnet",
93
+ "deepseek/deepseek-coder",
94
+ "anthropic/claude-3-haiku",
95
+ "openai/gpt-3.5-turbo-instruct",
96
+ "qwen/qwen-72b-chat",
97
+ "google/gemma-2-27b-it"
98
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  class FollowupQueryModel(BaseModel):
101
  query: str = Field(..., description="User's query for the followup agent")
 
105
  )
106
  conversation_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for the conversation")
107
  user_id: str = Field(..., description="Unique identifier for the user")
 
 
 
 
108
 
109
  class Config:
110
  schema_extra = {
 
117
  }
118
  }
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ async def digiyatra_query_table(query: str, db: Annotated[Any, Depends(get_db_connection)], limit: Optional[int] = 5):
123
+ """Query the digiyatra table."""
124
+ response = await query_table(
125
+ table_id="llama",
126
+ query=query,
127
+ user_id="digiyatra",
128
+ limit=limit
129
+ )
130
+ return response.results['data'][0]['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  @app.post("/digiyatra-followup")
133
+ async def followup_agent(query: FollowupQueryModel, background_tasks: BackgroundTasks):
134
  """
135
  Followup agent endpoint that provides helpful responses or generates clarifying questions based on user queries.
136
  Requires API Key authentication via X-API-Key header.
137
  """
138
+ try:
139
+ logger.info(f"Received followup agent query: {query.query}")
140
+
141
+ if query.conversation_id not in conversations:
142
+ conversations[query.conversation_id] = [
143
+ {"role": "system", "content": FOLLOWUP_DIGIYATRA_PROMPT}
144
+ ]
145
+
146
+ digiyatra_response = await digiyatra_query_table(query.query, db=get_db_connection(), limit=5)
147
+ user_query_with_context = f"{query.query} \n\n FAQ Context for ANSWERING: {digiyatra_response}"
148
+ conversations[query.conversation_id].append({"role": "user", "content": user_query_with_context})
149
+ last_activity[query.conversation_id] = time.time()
150
+
151
+ # Limit tokens in the conversation history
152
+ limited_conversation = conversations[query.conversation_id]
153
+
154
+ async def process_response():
155
+ try:
156
+ full_response = ""
157
+ async for content in client.generate_response(limited_conversation, model=query.model_id, conversation_id=query.conversation_id, user=query.user_id):
158
+ full_response += content
159
+ yield f"{json.dumps({'type': 'token', 'content': content})}"
160
+
161
+ logger.info(f"LLM RAW response for query: {query.query}: {full_response}")
162
+ response_content, interact, tools = parse_followup_and_tools(full_response)
163
+
164
+ result = {
165
+ "response": response_content,
166
+ "clarification": interact
167
+ }
168
+
169
+ yield f"{json.dumps({'type': 'metadata', 'response_full': result})}"
170
+
171
+ # Add the assistant's response to the conversation history
172
+ conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
173
+
174
+ background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.query, full_response)
175
+ logger.info(f"Completed followup agent response for query: {query.query}, send result: {result}")
176
+ except Exception as e:
177
+ logger.error(f"Error during response processing: {str(e)}")
178
+ yield f"{json.dumps({'type': 'error', 'message': 'An error occurred while processing the response.'})}"
179
+
180
+ return EventSourceResponse(process_response(), media_type="text/event-stream")
181
+ except Exception as e:
182
+ logger.error(f"Error in followup_agent: {str(e)}")
183
+ raise HTTPException(status_code=500, detail="An error occurred while processing the followup agent request.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  from fastapi.middleware.cors import CORSMiddleware
186
 
 
187
  app.add_middleware(
188
  CORSMiddleware,
189
+ allow_origins=["*"],
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  allow_credentials=True,
191
+ allow_methods=["*"],
192
  allow_headers=["*"],
 
193
  )
194
+
195
+ @app.on_event("startup")
196
+ def startup():
197
+ logger.info("Starting up the application")
198
+ init_db()
199
+
200
+ @app.on_event("shutdown")
201
+ def shutdown():
202
+ logger.info("Shutting down the application")
203
+
204
+
205
+
206
+ # import uvicorn
207
+
208
+ # if __name__ == "__main__":
209
+ # uvicorn.run(
210
+ # "app:app",
211
+ # host="0.0.0.0",
212
+ # port=8000,
213
+ # workers=4,
214
+ # reload=False,
215
+ # access_log=False
216
+ # )