Rauhan commited on
Commit
fc01c1b
1 Parent(s): f0d6550

UPDATE: Web Crawler

Browse files
Files changed (3) hide show
  1. app.py +14 -0
  2. functions.py +1 -1
  3. requirements.txt +1 -0
app.py CHANGED
@@ -1,8 +1,11 @@
1
  import io
 
2
  from functions import *
3
  from PyPDF2 import PdfReader
 
4
  from fastapi import FastAPI, File, UploadFile
5
  from fastapi.middleware.cors import CORSMiddleware
 
6
 
7
 
8
  app = FastAPI(title = "ConversAI", root_path = "/api/v1")
@@ -47,6 +50,17 @@ async def addText(vectorstore: str, text: str):
47
  return addDocuments(text = text, vectorstore = vectorstore)
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
50
  @app.post("/answerQuery")
51
  async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
52
  return answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
 
1
  import io
2
+ import re
3
  from functions import *
4
  from PyPDF2 import PdfReader
5
+ from bs4 import BeautifulSoup
6
  from fastapi import FastAPI, File, UploadFile
7
  from fastapi.middleware.cors import CORSMiddleware
8
+ from langchain_community.document_loaders import RecursiveUrlLoader
9
 
10
 
11
  app = FastAPI(title = "ConversAI", root_path = "/api/v1")
 
50
  return addDocuments(text = text, vectorstore = vectorstore)
51
 
52
 
53
+ @app.post("/addWebsite")
54
+ async def addWebsite(vectorstore: str, websiteUrl: str):
55
+ def bs4_extractor(html: str) -> str:
56
+ soup = BeautifulSoup(html, "lxml")
57
+ return re.sub(r"\n\n+", "\n\n", soup.text).strip()
58
+ loader = RecursiveUrlLoader(websiteUrl, max_depth=2, timeout = 60, extractor=bs4_extractor)
59
+ docs = loader.load()
60
+ text = "\n\n".join([docs[doc].page_content for doc in docs])
61
+ return addDocuments(text = text, vectorstore = vectorstore)
62
+
63
+
64
  @app.post("/answerQuery")
65
  async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
66
  return answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
functions.py CHANGED
@@ -32,7 +32,7 @@ vectorEmbeddings = HuggingFaceEmbeddings(
32
  model_kwargs = model_kwargs,
33
  encode_kwargs = encode_kwargs
34
  )
35
- sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25", threads = 100, parallel = 0)
36
  prompt = """
37
  ### Role
38
  - **Primary Function**: You are an AI chatbot dedicated to assisting users with their inquiries, issues, and requests. Your goal is to deliver excellent, friendly, and efficient responses at all times. Listen attentively, understand user needs, and provide the best assistance possible or direct them to appropriate resources. If a question is unclear, ask for clarification. Always conclude your replies on a positive note.
 
32
  model_kwargs = model_kwargs,
33
  encode_kwargs = encode_kwargs
34
  )
35
+ sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25", parallel = 2)
36
  prompt = """
37
  ### Role
38
  - **Primary Function**: You are an AI chatbot dedicated to assisting users with their inquiries, issues, and requests. Your goal is to deliver excellent, friendly, and efficient responses at all times. Listen attentively, understand user needs, and provide the best assistance possible or direct them to appropriate resources. If a question is unclear, ask for clarification. Always conclude your replies on a positive note.
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  huggingface-hub
2
  fastapi
3
  fastembed-gpu
 
1
+ bs4
2
  huggingface-hub
3
  fastapi
4
  fastembed-gpu