Spaces:
Sleeping
Sleeping
Asaad Almutareb
commited on
Commit
Β·
fa99d8f
1
Parent(s):
057d3c8
cleaned branch, added final streaming callback handler
Browse files- {innovation_pathfinder_ai/backend β app/api}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app/api β app/api/v1}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app/api/v1 β app/api/v1/agents}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/agents/hf_mixtral_agent.py +37 -15
- {innovation_pathfinder_ai/backend/app β app}/api/v1/agents/ollama_mixtral_agent.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/agents/requirements.txt +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/api.py +0 -0
- {innovation_pathfinder_ai/backend/app/api/v1/agents β app/api/v1/endpoints}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/endpoints/add_to_kb.py +0 -0
- {innovation_pathfinder_ai/backend/app/api/v1/endpoints β app/core}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/core/config.py +4 -5
- {innovation_pathfinder_ai/backend/app β app}/crud/db_handler.py +5 -3
- {innovation_pathfinder_ai/backend/app β app}/database/db_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/knowledge_base/placeholder.txt +0 -0
- {innovation_pathfinder_ai/backend/app β app}/main.py +9 -4
- {innovation_pathfinder_ai/backend/app β app}/schemas/adaptive_cards_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/schemas/message_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/schemas/response_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/structured_tools/structured_tools.py +15 -9
- {innovation_pathfinder_ai/backend/app β app}/templates/chat.py +3 -3
- {innovation_pathfinder_ai/backend/app β app}/templates/react_json_with_memory.py +0 -0
- {innovation_pathfinder_ai/backend/app/core β app/utils}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/adaptive_cards/cards.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/callback.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/chains.py +2 -1
- {innovation_pathfinder_ai/backend/app β app}/utils/logger.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/utils.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/vector_store/chroma_vector_store.py +12 -7
- {innovation_pathfinder_ai/backend/app β app}/vector_store/initialize_chroma_db.py +9 -6
- {innovation_pathfinder_ai/frontend/assets β assets}/avatar.png +0 -0
- {innovation_pathfinder_ai/frontend/assets β assets}/favicon.ico +0 -0
- innovation_pathfinder_ai/backend/app/utils/__init__.py +0 -0
- innovation_pathfinder_ai/frontend/app.py +0 -143
- innovation_pathfinder_ai/source_container/container.py +0 -1
{innovation_pathfinder_ai/backend β app/api}/__init__.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app/api β app/api/v1}/__init__.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app/api/v1 β app/api/v1/agents}/__init__.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/agents/hf_mixtral_agent.py
RENAMED
@@ -5,8 +5,13 @@ from langchain.agents.format_scratchpad import format_log_to_str
|
|
5 |
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
|
6 |
# Import things that are needed generically
|
7 |
from langchain.tools.render import render_text_description
|
|
|
|
|
|
|
|
|
8 |
import os
|
9 |
from dotenv import load_dotenv
|
|
|
10 |
from app.structured_tools.structured_tools import (
|
11 |
arxiv_search, get_arxiv_paper, google_search, wikipedia_search, knowledgeBase_search, memory_search
|
12 |
)
|
@@ -17,43 +22,59 @@ from app.utils import logger
|
|
17 |
from app.utils import utils
|
18 |
from langchain.globals import set_llm_cache
|
19 |
from langchain.cache import SQLiteCache
|
20 |
-
from app.utils.callback import
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
|
|
23 |
logger = logger.get_console_logger("hf_mixtral_agent")
|
24 |
|
25 |
config = load_dotenv(".env")
|
26 |
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
27 |
GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')
|
28 |
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
|
29 |
-
LANGCHAIN_TRACING_V2 = "true"
|
30 |
-
LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
|
31 |
-
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
|
32 |
-
LANGCHAIN_PROJECT = os.getenv('LANGCHAIN_PROJECT')
|
|
|
|
|
|
|
|
|
33 |
|
34 |
router = APIRouter()
|
35 |
|
|
|
|
|
36 |
@router.websocket("/agent")
|
37 |
async def websocket_endpoint(websocket: WebSocket):
|
38 |
await websocket.accept()
|
|
|
|
|
|
|
39 |
|
40 |
while True:
|
41 |
try:
|
42 |
data = await websocket.receive_json()
|
43 |
user_message = data["message"]
|
|
|
44 |
chat_history = []#data["history"]
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
|
54 |
-
|
55 |
message_id: str = utils.generate_uuid()
|
56 |
-
custom_handler =
|
57 |
websocket, message_id=message_id
|
58 |
)
|
59 |
|
@@ -106,6 +127,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
106 |
#max_execution_time=60, # timout at 60 sec
|
107 |
return_intermediate_steps=True,
|
108 |
handle_parsing_errors=True,
|
|
|
109 |
)
|
110 |
|
111 |
await agent_executor.arun(input=user_message, chat_history=chat_history, callbacks=[custom_handler])
|
|
|
5 |
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
|
6 |
# Import things that are needed generically
|
7 |
from langchain.tools.render import render_text_description
|
8 |
+
from app.schemas.message_schema import (
|
9 |
+
IChatResponse,
|
10 |
+
)
|
11 |
+
from app.utils.utils import generate_uuid
|
12 |
import os
|
13 |
from dotenv import load_dotenv
|
14 |
+
from app.utils.adaptive_cards.cards import create_adaptive_card
|
15 |
from app.structured_tools.structured_tools import (
|
16 |
arxiv_search, get_arxiv_paper, google_search, wikipedia_search, knowledgeBase_search, memory_search
|
17 |
)
|
|
|
22 |
from app.utils import utils
|
23 |
from langchain.globals import set_llm_cache
|
24 |
from langchain.cache import SQLiteCache
|
25 |
+
from app.utils.callback import (
|
26 |
+
CustomAsyncCallbackHandler,
|
27 |
+
CustomFinalStreamingStdOutCallbackHandler,
|
28 |
+
)
|
29 |
+
from langchain.memory import ConversationBufferMemory
|
30 |
+
from app.core.config import settings
|
31 |
|
32 |
+
local_cache=settings.LOCAL_CACHE
|
33 |
+
set_llm_cache(SQLiteCache(database_path=local_cache))
|
34 |
logger = logger.get_console_logger("hf_mixtral_agent")
|
35 |
|
36 |
config = load_dotenv(".env")
|
37 |
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
38 |
GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')
|
39 |
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
|
40 |
+
# LANGCHAIN_TRACING_V2 = "true"
|
41 |
+
# LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
|
42 |
+
# LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
|
43 |
+
# LANGCHAIN_PROJECT = os.getenv('LANGCHAIN_PROJECT')
|
44 |
+
# GOOGLE_CSE_ID=settings.GOOGLE_CSE_ID
|
45 |
+
# GOOGLE_API_KEY=settings.GOOGLE_API_KEY
|
46 |
+
# HUGGINGFACEHUB_API_TOKEN=settings.HUGGINGFACEHUB_API_TOKEN
|
47 |
+
# print(HUGGINGFACEHUB_API_TOKEN)
|
48 |
|
49 |
router = APIRouter()
|
50 |
|
51 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
52 |
+
|
53 |
@router.websocket("/agent")
|
54 |
async def websocket_endpoint(websocket: WebSocket):
|
55 |
await websocket.accept()
|
56 |
+
if not settings.HUGGINGFACEHUB_API_TOKEN.startswith("hf_"):
|
57 |
+
await websocket.send_json({"error": "HUGGINGFACEHUB_API_TOKEN is not set"})
|
58 |
+
return
|
59 |
|
60 |
while True:
|
61 |
try:
|
62 |
data = await websocket.receive_json()
|
63 |
user_message = data["message"]
|
64 |
+
user_message_card = create_adaptive_card(user_message)
|
65 |
chat_history = []#data["history"]
|
66 |
|
67 |
+
resp = IChatResponse(
|
68 |
+
sender="you",
|
69 |
+
message=user_message_card.to_dict(),
|
70 |
+
type="start",
|
71 |
+
message_id=generate_uuid(),
|
72 |
+
id=generate_uuid(),
|
73 |
+
)
|
74 |
|
75 |
+
await websocket.send_json(resp.model_dump())
|
76 |
message_id: str = utils.generate_uuid()
|
77 |
+
custom_handler = CustomFinalStreamingStdOutCallbackHandler(
|
78 |
websocket, message_id=message_id
|
79 |
)
|
80 |
|
|
|
127 |
#max_execution_time=60, # timout at 60 sec
|
128 |
return_intermediate_steps=True,
|
129 |
handle_parsing_errors=True,
|
130 |
+
#memory=memory
|
131 |
)
|
132 |
|
133 |
await agent_executor.arun(input=user_message, chat_history=chat_history, callbacks=[custom_handler])
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/agents/ollama_mixtral_agent.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/agents/requirements.txt
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/api.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app/api/v1/agents β app/api/v1/endpoints}/__init__.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/endpoints/add_to_kb.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app/api/v1/endpoints β app/core}/__init__.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/core/config.py
RENAMED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
-
from pydantic import AnyHttpUrl,
|
|
|
3 |
from enum import Enum
|
4 |
|
5 |
|
@@ -15,9 +16,6 @@ class Settings(BaseSettings):
|
|
15 |
MODE: ModeEnum = ModeEnum.development
|
16 |
API_VERSION: str = "v1"
|
17 |
API_V1_STR: str = f"/api/{API_VERSION}"
|
18 |
-
OPENAI_API_KEY: str
|
19 |
-
UNSPLASH_API_KEY: str
|
20 |
-
SERP_API_KEY: str
|
21 |
HUGGINGFACEHUB_API_TOKEN: str
|
22 |
GOOGLE_CSE_ID: str
|
23 |
GOOGLE_API_KEY: str
|
@@ -25,10 +23,11 @@ class Settings(BaseSettings):
|
|
25 |
CONVERSATION_COLLECTION_NAME: str
|
26 |
EMBEDDING_MODEL: str
|
27 |
SOURCES_CACHE: str
|
|
|
28 |
|
29 |
class Config:
|
30 |
case_sensitive = True
|
31 |
-
env_file = os.path.expanduser("
|
32 |
|
33 |
|
34 |
settings = Settings()
|
|
|
1 |
import os
|
2 |
+
from pydantic import AnyHttpUrl, ConfigDict
|
3 |
+
from pydantic_settings import BaseSettings
|
4 |
from enum import Enum
|
5 |
|
6 |
|
|
|
16 |
MODE: ModeEnum = ModeEnum.development
|
17 |
API_VERSION: str = "v1"
|
18 |
API_V1_STR: str = f"/api/{API_VERSION}"
|
|
|
|
|
|
|
19 |
HUGGINGFACEHUB_API_TOKEN: str
|
20 |
GOOGLE_CSE_ID: str
|
21 |
GOOGLE_API_KEY: str
|
|
|
23 |
CONVERSATION_COLLECTION_NAME: str
|
24 |
EMBEDDING_MODEL: str
|
25 |
SOURCES_CACHE: str
|
26 |
+
LOCAL_CACHE: str
|
27 |
|
28 |
class Config:
|
29 |
case_sensitive = True
|
30 |
+
env_file = os.path.expanduser(".env")
|
31 |
|
32 |
|
33 |
settings = Settings()
|
{innovation_pathfinder_ai/backend/app β app}/crud/db_handler.py
RENAMED
@@ -2,11 +2,13 @@ from sqlmodel import SQLModel, create_engine, Session, select
|
|
2 |
from app.database.db_schema import Sources
|
3 |
from app.utils.logger import get_console_logger
|
4 |
import os
|
5 |
-
from
|
|
|
6 |
|
7 |
-
load_dotenv()
|
8 |
|
9 |
-
sqlite_file_name = os.getenv('SOURCES_CACHE')
|
|
|
10 |
|
11 |
sqlite_url = f"sqlite:///{sqlite_file_name}"
|
12 |
engine = create_engine(sqlite_url, echo=False)
|
|
|
2 |
from app.database.db_schema import Sources
|
3 |
from app.utils.logger import get_console_logger
|
4 |
import os
|
5 |
+
from app.core.config import settings
|
6 |
+
#from dotenv import load_dotenv
|
7 |
|
8 |
+
#load_dotenv()
|
9 |
|
10 |
+
#sqlite_file_name = os.getenv('SOURCES_CACHE')
|
11 |
+
sqlite_file_name = settings.SOURCES_CACHE
|
12 |
|
13 |
sqlite_url = f"sqlite:///{sqlite_file_name}"
|
14 |
engine = create_engine(sqlite_url, echo=False)
|
{innovation_pathfinder_ai/backend/app β app}/database/db_schema.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/knowledge_base/placeholder.txt
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/main.py
RENAMED
@@ -1,18 +1,23 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
from app.api.v1.api import api_router as api_router_v1
|
3 |
from fastapi.responses import HTMLResponse
|
|
|
4 |
from app.templates.chat import chat_html
|
5 |
#from app.core.config import settings
|
6 |
from fastapi.middleware.cors import CORSMiddleware
|
7 |
|
8 |
-
app = FastAPI(
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
# CORS Middleware setup for allowing frontend requests
|
11 |
# ToDO: replace with settings.BACKEND_CORS_ORIGINS once core/config.py is implemented
|
12 |
-
if BACKEND_CORS_ORIGINS:
|
13 |
app.add_middleware(
|
14 |
CORSMiddleware,
|
15 |
-
allow_origins=[str(origin) for origin in BACKEND_CORS_ORIGINS],
|
16 |
allow_credentials=True,
|
17 |
allow_methods=["*"],
|
18 |
allow_headers=["*"],
|
|
|
1 |
from fastapi import FastAPI
|
2 |
from app.api.v1.api import api_router as api_router_v1
|
3 |
from fastapi.responses import HTMLResponse
|
4 |
+
from app.core.config import settings
|
5 |
from app.templates.chat import chat_html
|
6 |
#from app.core.config import settings
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
|
9 |
+
app = FastAPI(
|
10 |
+
title=settings.PROJECT_NAME,
|
11 |
+
version=settings.API_VERSION,
|
12 |
+
openapi_url=f"{settings.API_V1_STR}/openapi.json",
|
13 |
+
)
|
14 |
+
#BACKEND_CORS_ORIGINS = ["*"]
|
15 |
# CORS Middleware setup for allowing frontend requests
|
16 |
# ToDO: replace with settings.BACKEND_CORS_ORIGINS once core/config.py is implemented
|
17 |
+
if settings.BACKEND_CORS_ORIGINS:
|
18 |
app.add_middleware(
|
19 |
CORSMiddleware,
|
20 |
+
allow_origins=[str(origin) for origin in settings.BACKEND_CORS_ORIGINS],
|
21 |
allow_credentials=True,
|
22 |
allow_methods=["*"],
|
23 |
allow_headers=["*"],
|
{innovation_pathfinder_ai/backend/app β app}/schemas/adaptive_cards_schema.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/schemas/message_schema.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/schemas/response_schema.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/structured_tools/structured_tools.py
RENAMED
@@ -8,9 +8,10 @@ from langchain_community.utilities import GoogleSearchAPIWrapper
|
|
8 |
from langchain_community.embeddings.sentence_transformer import (
|
9 |
SentenceTransformerEmbeddings,
|
10 |
)
|
|
|
11 |
from langchain_community.vectorstores import Chroma
|
12 |
import arxiv
|
13 |
-
import ast
|
14 |
|
15 |
import chromadb
|
16 |
|
@@ -34,7 +35,8 @@ from app.utils.utils import (
|
|
34 |
import os
|
35 |
# from app.utils import create_wikipedia_urls_from_text
|
36 |
|
37 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
|
|
38 |
|
39 |
@tool
|
40 |
def memory_search(query:str) -> str:
|
@@ -45,11 +47,13 @@ def memory_search(query:str) -> str:
|
|
45 |
path=persist_directory,
|
46 |
)
|
47 |
|
48 |
-
collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
|
|
49 |
#store using envar
|
50 |
|
51 |
embedding_function = SentenceTransformerEmbeddings(
|
52 |
-
model_name=
|
|
|
53 |
)
|
54 |
|
55 |
vector_db = Chroma(
|
@@ -75,7 +79,8 @@ def knowledgeBase_search(query:str) -> str:
|
|
75 |
#store using envar
|
76 |
|
77 |
embedding_function = SentenceTransformerEmbeddings(
|
78 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
79 |
)
|
80 |
|
81 |
vector_db = Chroma(
|
@@ -100,7 +105,7 @@ def arxiv_search(query: str) -> str:
|
|
100 |
formatted_sources = format_arxiv_documents(data)
|
101 |
#all_sources += formatted_sources
|
102 |
parsed_sources = parse_list_to_dicts(formatted_sources)
|
103 |
-
|
104 |
|
105 |
return data.__str__()
|
106 |
|
@@ -149,7 +154,8 @@ def embed_arvix_paper(paper_id:str) -> None:
|
|
149 |
#store using envar
|
150 |
|
151 |
embedding_function = SentenceTransformerEmbeddings(
|
152 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
153 |
)
|
154 |
|
155 |
full_path = os.path.join(pdf_directory, pdf_file_name)
|
@@ -170,7 +176,7 @@ def wikipedia_search(query: str) -> str:
|
|
170 |
formatted_summaries = format_wiki_summaries(wikipedia_results)
|
171 |
#all_sources += formatted_summaries
|
172 |
parsed_summaries = parse_list_to_dicts(formatted_summaries)
|
173 |
-
|
174 |
#all_sources += create_wikipedia_urls_from_text(wikipedia_results)
|
175 |
return wikipedia_results
|
176 |
|
@@ -183,7 +189,7 @@ def google_search(query: str) -> str:
|
|
183 |
search_results:dict = websearch.results(query, 3)
|
184 |
cleaner_sources =format_search_results(search_results)
|
185 |
parsed_csources = parse_list_to_dicts(cleaner_sources)
|
186 |
-
|
187 |
#all_sources += cleaner_sources
|
188 |
|
189 |
return cleaner_sources.__str__()
|
|
|
8 |
from langchain_community.embeddings.sentence_transformer import (
|
9 |
SentenceTransformerEmbeddings,
|
10 |
)
|
11 |
+
from app.core.config import settings
|
12 |
from langchain_community.vectorstores import Chroma
|
13 |
import arxiv
|
14 |
+
#import ast
|
15 |
|
16 |
import chromadb
|
17 |
|
|
|
35 |
import os
|
36 |
# from app.utils import create_wikipedia_urls_from_text
|
37 |
|
38 |
+
#persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
39 |
+
persist_directory = settings.VECTOR_DATABASE_LOCATION
|
40 |
|
41 |
@tool
|
42 |
def memory_search(query:str) -> str:
|
|
|
47 |
path=persist_directory,
|
48 |
)
|
49 |
|
50 |
+
#collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
51 |
+
collection_name = settings.CONVERSATION_COLLECTION_NAME
|
52 |
#store using envar
|
53 |
|
54 |
embedding_function = SentenceTransformerEmbeddings(
|
55 |
+
model_name=settings.EMBEDDING_MODEL
|
56 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
57 |
)
|
58 |
|
59 |
vector_db = Chroma(
|
|
|
79 |
#store using envar
|
80 |
|
81 |
embedding_function = SentenceTransformerEmbeddings(
|
82 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
83 |
+
model_name=settings.EMBEDDING_MODEL
|
84 |
)
|
85 |
|
86 |
vector_db = Chroma(
|
|
|
105 |
formatted_sources = format_arxiv_documents(data)
|
106 |
#all_sources += formatted_sources
|
107 |
parsed_sources = parse_list_to_dicts(formatted_sources)
|
108 |
+
add_many(parsed_sources)
|
109 |
|
110 |
return data.__str__()
|
111 |
|
|
|
154 |
#store using envar
|
155 |
|
156 |
embedding_function = SentenceTransformerEmbeddings(
|
157 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
158 |
+
model_name=settings.EMBEDDING_MODEL
|
159 |
)
|
160 |
|
161 |
full_path = os.path.join(pdf_directory, pdf_file_name)
|
|
|
176 |
formatted_summaries = format_wiki_summaries(wikipedia_results)
|
177 |
#all_sources += formatted_summaries
|
178 |
parsed_summaries = parse_list_to_dicts(formatted_summaries)
|
179 |
+
add_many(parsed_summaries)
|
180 |
#all_sources += create_wikipedia_urls_from_text(wikipedia_results)
|
181 |
return wikipedia_results
|
182 |
|
|
|
189 |
search_results:dict = websearch.results(query, 3)
|
190 |
cleaner_sources =format_search_results(search_results)
|
191 |
parsed_csources = parse_list_to_dicts(cleaner_sources)
|
192 |
+
add_many(parsed_csources)
|
193 |
#all_sources += cleaner_sources
|
194 |
|
195 |
return cleaner_sources.__str__()
|
{innovation_pathfinder_ai/backend/app β app}/templates/chat.py
RENAMED
@@ -6,10 +6,10 @@ chat_html = """
|
|
6 |
</head>
|
7 |
<body>
|
8 |
<webchat-widget
|
9 |
-
widget-websocket="ws://localhost:
|
10 |
widget-color="#47A7F6"
|
11 |
-
widget-chat-avatar="https://icon-library.com/images/
|
12 |
-
widget-user-avatar="https://
|
13 |
widget-header="Bot"
|
14 |
widget-subheader="Online"
|
15 |
widget-placeholder="Send a message"
|
|
|
6 |
</head>
|
7 |
<body>
|
8 |
<webchat-widget
|
9 |
+
widget-websocket="ws://localhost:8000/chat/agent"
|
10 |
widget-color="#47A7F6"
|
11 |
+
widget-chat-avatar="https://icon-library.com/images/bot-icon/bot-icon-1.jpg"
|
12 |
+
widget-user-avatar="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQWR4hHJTiikyzCv6nc5OAkHPIHMD-ESsP-LFEaY2vVIjV6wqCt&s"
|
13 |
widget-header="Bot"
|
14 |
widget-subheader="Online"
|
15 |
widget-placeholder="Send a message"
|
{innovation_pathfinder_ai/backend/app β app}/templates/react_json_with_memory.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app/core β app/utils}/__init__.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/adaptive_cards/cards.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/callback.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/chains.py
RENAMED
@@ -1,4 +1,5 @@
|
|
1 |
-
from langchain import LLMChain
|
|
|
2 |
from langchain_community.llms import HuggingFaceEndpoint
|
3 |
|
4 |
import re
|
|
|
1 |
+
from langchain.chains import LLMChain
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
from langchain_community.llms import HuggingFaceEndpoint
|
4 |
|
5 |
import re
|
{innovation_pathfinder_ai/backend/app β app}/utils/logger.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/utils.py
RENAMED
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/vector_store/chroma_vector_store.py
RENAMED
@@ -23,11 +23,13 @@ from langchain_community.embeddings.sentence_transformer import (
|
|
23 |
from app.utils.utils import (
|
24 |
generate_uuid
|
25 |
)
|
26 |
-
import
|
27 |
-
import
|
|
|
28 |
|
29 |
-
dotenv.load_dotenv()
|
30 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
|
|
31 |
|
32 |
|
33 |
def read_markdown_file(file_path: str) -> str:
|
@@ -96,7 +98,8 @@ def add_markdown_to_collection(
|
|
96 |
)
|
97 |
|
98 |
embedding_function = SentenceTransformerEmbeddings(
|
99 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
100 |
)
|
101 |
|
102 |
documents_page_content:list = [i.page_content for i in splits]
|
@@ -178,7 +181,8 @@ def add_pdf_to_vector_store(
|
|
178 |
)
|
179 |
|
180 |
embedding_function = SentenceTransformerEmbeddings(
|
181 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
182 |
)
|
183 |
|
184 |
documents_page_content:list = [i.page_content for i in split_docs]
|
@@ -236,7 +240,8 @@ if __name__ == "__main__":
|
|
236 |
|
237 |
# create the open-source embedding function
|
238 |
embedding_function = SentenceTransformerEmbeddings(
|
239 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
240 |
)
|
241 |
|
242 |
#method of integrating Chroma and Langchain
|
|
|
23 |
from app.utils.utils import (
|
24 |
generate_uuid
|
25 |
)
|
26 |
+
from app.core.config import settings
|
27 |
+
# import dotenv
|
28 |
+
# import os
|
29 |
|
30 |
+
# dotenv.load_dotenv()
|
31 |
+
# persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
32 |
+
persist_directory = settings.VECTOR_DATABASE_LOCATION
|
33 |
|
34 |
|
35 |
def read_markdown_file(file_path: str) -> str:
|
|
|
98 |
)
|
99 |
|
100 |
embedding_function = SentenceTransformerEmbeddings(
|
101 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
102 |
+
model_name=settings.EMBEDDING_MODEL
|
103 |
)
|
104 |
|
105 |
documents_page_content:list = [i.page_content for i in splits]
|
|
|
181 |
)
|
182 |
|
183 |
embedding_function = SentenceTransformerEmbeddings(
|
184 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
185 |
+
model_name=settings.EMBEDDING_MODEL
|
186 |
)
|
187 |
|
188 |
documents_page_content:list = [i.page_content for i in split_docs]
|
|
|
240 |
|
241 |
# create the open-source embedding function
|
242 |
embedding_function = SentenceTransformerEmbeddings(
|
243 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
244 |
+
model_name=settings.EMBEDDING_MODEL
|
245 |
)
|
246 |
|
247 |
#method of integrating Chroma and Langchain
|
{innovation_pathfinder_ai/backend/app β app}/vector_store/initialize_chroma_db.py
RENAMED
@@ -1,14 +1,17 @@
|
|
1 |
from langchain_community.vectorstores import Chroma
|
|
|
2 |
import chromadb
|
3 |
-
import dotenv
|
4 |
-
import os
|
5 |
|
6 |
-
dotenv.load_dotenv()
|
7 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
|
|
8 |
|
9 |
def initialize_chroma_db() -> Chroma:
|
10 |
-
collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
11 |
-
|
|
|
12 |
client = chromadb.PersistentClient(
|
13 |
path=persist_directory
|
14 |
)
|
|
|
1 |
from langchain_community.vectorstores import Chroma
|
2 |
+
from app.core.config import settings
|
3 |
import chromadb
|
4 |
+
#import dotenv
|
5 |
+
#import os
|
6 |
|
7 |
+
#dotenv.load_dotenv()
|
8 |
+
#persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
9 |
+
persist_directory = settings.VECTOR_DATABASE_LOCATION
|
10 |
|
11 |
def initialize_chroma_db() -> Chroma:
|
12 |
+
#collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
13 |
+
collection_name = settings.CONVERSATION_COLLECTION_NAME
|
14 |
+
|
15 |
client = chromadb.PersistentClient(
|
16 |
path=persist_directory
|
17 |
)
|
{innovation_pathfinder_ai/frontend/assets β assets}/avatar.png
RENAMED
File without changes
|
{innovation_pathfinder_ai/frontend/assets β assets}/favicon.ico
RENAMED
File without changes
|
innovation_pathfinder_ai/backend/app/utils/__init__.py
DELETED
File without changes
|
innovation_pathfinder_ai/frontend/app.py
DELETED
@@ -1,143 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
-
import gradio as gr
|
3 |
-
from gradio.themes.base import Base
|
4 |
-
#from innovation_pathfinder_ai.backend.app.api.v1.agents.hf_mixtral_agent import agent_executor
|
5 |
-
#from innovation_pathfinder_ai.source_container.container import (
|
6 |
-
# all_sources
|
7 |
-
#)
|
8 |
-
#from innovation_pathfinder_ai.backend.app.utils.utils import extract_urls
|
9 |
-
#from innovation_pathfinder_ai.backend.app.utils import logger
|
10 |
-
#from innovation_pathfinder_ai.backend.app.vector_store.chroma_vector_store import initialize_chroma_db
|
11 |
-
#from innovation_pathfinder_ai.backend.app.utils.utils import (
|
12 |
-
# generate_uuid
|
13 |
-
#)
|
14 |
-
from langchain_community.vectorstores import Chroma
|
15 |
-
|
16 |
-
import asyncio
|
17 |
-
import websockets
|
18 |
-
import json
|
19 |
-
import dotenv
|
20 |
-
import os
|
21 |
-
|
22 |
-
dotenv.load_dotenv()
|
23 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
24 |
-
|
25 |
-
#logger = logger.get_console_logger("app")
|
26 |
-
|
27 |
-
app = FastAPI()
|
28 |
-
|
29 |
-
if __name__ == "__main__":
|
30 |
-
|
31 |
-
#db = initialize_chroma_db()
|
32 |
-
|
33 |
-
def add_text(history, text):
|
34 |
-
history = history + [(text, None)]
|
35 |
-
return history, ""
|
36 |
-
|
37 |
-
def bot(history):
|
38 |
-
response = infer(history[-1][0], history)
|
39 |
-
# Existing logic remains the same up to the point where you need to call backend operations
|
40 |
-
# Example for calling generate_uuid from the backend
|
41 |
-
# response = requests.post("http://localhost:8000/add-document")
|
42 |
-
#current_id = response.text
|
43 |
-
# sources = extract_urls(all_sources)
|
44 |
-
# src_list = '\n'.join(sources)
|
45 |
-
# current_id = generate_uuid()
|
46 |
-
# db.add(
|
47 |
-
# ids=[current_id],
|
48 |
-
# documents=[response['output']],
|
49 |
-
# metadatas=[
|
50 |
-
# {
|
51 |
-
# "human_message":history[-1][0],
|
52 |
-
# "sources": 'Internal Knowledge Base From: \n\n' + src_list
|
53 |
-
# }
|
54 |
-
# ]
|
55 |
-
# )
|
56 |
-
# if not sources:
|
57 |
-
# response_w_sources = response['output']+"\n\n\n Sources: \n\n\n Internal knowledge base"
|
58 |
-
# else:
|
59 |
-
# response_w_sources = response['output']+"\n\n\n Sources: \n\n\n"+src_list
|
60 |
-
print(response)
|
61 |
-
history[-1][1] = response['output']
|
62 |
-
# all_sources.clear()
|
63 |
-
return history
|
64 |
-
|
65 |
-
async def ask_question_async(question, history):
|
66 |
-
uri = "ws://localhost:8000/chat/agent" # Update this URI to your actual WebSocket endpoint
|
67 |
-
async with websockets.connect(uri) as websocket:
|
68 |
-
# Prepare the message to send (adjust the structure as needed for your backend)
|
69 |
-
message_data = {
|
70 |
-
"message": question,
|
71 |
-
"history": history
|
72 |
-
}
|
73 |
-
json_data = json.dumps(message_data)
|
74 |
-
await websocket.send(json_data)
|
75 |
-
|
76 |
-
# Wait for the response
|
77 |
-
response_data = await websocket.recv()
|
78 |
-
return json.loads(response_data)
|
79 |
-
|
80 |
-
def infer(question, history):
|
81 |
-
# result = agent_executor.invoke(
|
82 |
-
# {
|
83 |
-
# "input": question,
|
84 |
-
# "chat_history": history
|
85 |
-
# }
|
86 |
-
# )
|
87 |
-
# return result
|
88 |
-
try:
|
89 |
-
# Ensure there's an event loop to run async code
|
90 |
-
loop = asyncio.get_event_loop()
|
91 |
-
except RuntimeError as ex:
|
92 |
-
if "There is no current event loop" in str(ex):
|
93 |
-
loop = asyncio.new_event_loop()
|
94 |
-
asyncio.set_event_loop(loop)
|
95 |
-
|
96 |
-
result = loop.run_until_complete(ask_question_async(question, history))
|
97 |
-
return result
|
98 |
-
|
99 |
-
# Run the asynchronous function in the synchronous context
|
100 |
-
result = asyncio.get_event_loop().run_until_complete(ask_question_async(question, history))
|
101 |
-
return result
|
102 |
-
|
103 |
-
def vote(data: gr.LikeData):
|
104 |
-
if data.liked:
|
105 |
-
print("You upvoted this response: " + data.value)
|
106 |
-
else:
|
107 |
-
print("You downvoted this response: " + data.value)
|
108 |
-
|
109 |
-
css="""
|
110 |
-
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
111 |
-
"""
|
112 |
-
|
113 |
-
title = """
|
114 |
-
<div style="text-align:left;">
|
115 |
-
<p>Hello Human, I am your AI knowledge research assistant. I can explore topics across ArXiv, Wikipedia and use Google search.<br />
|
116 |
-
</div>
|
117 |
-
"""
|
118 |
-
|
119 |
-
with gr.Blocks(theme=gr.themes.Soft(), title="AlfredAI - AI Knowledge Research Assistant") as demo:
|
120 |
-
# with gr.Tab("Google|Wikipedia|Arxiv"):
|
121 |
-
with gr.Column(elem_id="col-container"):
|
122 |
-
gr.HTML(title)
|
123 |
-
with gr.Row():
|
124 |
-
question = gr.Textbox(label="Question",
|
125 |
-
placeholder="Type your question and hit Enter",)
|
126 |
-
chatbot = gr.Chatbot([],
|
127 |
-
elem_id="AI Assistant",
|
128 |
-
bubble_full_width=False,
|
129 |
-
avatar_images=(None, "./assets/avatar.png"),
|
130 |
-
height=480,)
|
131 |
-
chatbot.like(vote, None, None)
|
132 |
-
clear = gr.Button("Clear")
|
133 |
-
question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
|
134 |
-
bot, chatbot, chatbot
|
135 |
-
)
|
136 |
-
clear.click(lambda: None, None, chatbot, queue=False)
|
137 |
-
with gr.Accordion("Open for More!", open=False):
|
138 |
-
gr.Markdown("Nothing yet...")
|
139 |
-
|
140 |
-
demo.queue().launch(debug=True, favicon_path="assets/favicon.ico", share=True)
|
141 |
-
|
142 |
-
x = 0 # for debugging purposes
|
143 |
-
app = gr.mount_gradio_app(app, demo, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
innovation_pathfinder_ai/source_container/container.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
all_sources = []
|
|
|
|