Spaces:
Running
Running
Add websockets
Browse files- Dockerfile +3 -20
- app/main.py +39 -6
- app/rag.py +5 -6
- requirements.txt +1 -0
- start_service.sh +0 -16
Dockerfile
CHANGED
@@ -1,34 +1,17 @@
|
|
1 |
-
#
|
2 |
FROM python:3.11
|
3 |
|
4 |
-
#
|
5 |
WORKDIR /code
|
6 |
|
7 |
-
#
|
8 |
COPY ./requirements.txt /code/requirements.txt
|
9 |
|
10 |
-
#
|
11 |
-
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
12 |
-
|
13 |
-
#
|
14 |
-
COPY ./start_service.sh /code/start_service.sh
|
15 |
-
|
16 |
-
#
|
17 |
COPY ./app /code/app
|
18 |
|
19 |
-
RUN
|
20 |
|
21 |
-
|
22 |
|
23 |
USER docker
|
24 |
|
25 |
-
# RUN nohup ollama serve & sleep 5
|
26 |
-
|
27 |
-
#
|
28 |
-
# RUN chmod +x /code/start_service.sh
|
29 |
-
|
30 |
-
#
|
31 |
EXPOSE 7860
|
32 |
|
33 |
-
|
34 |
-
ENTRYPOINT ["sh", "/code/start_service.sh"]
|
|
|
|
|
1 |
FROM python:3.11
|
2 |
|
|
|
3 |
WORKDIR /code
|
4 |
|
|
|
5 |
COPY ./requirements.txt /code/requirements.txt
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
COPY ./app /code/app
|
8 |
|
9 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
10 |
|
11 |
+
RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
|
12 |
|
13 |
USER docker
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
EXPOSE 7860
|
16 |
|
17 |
+
CMD ["fastapi", "run", "/code/app/main.py", "--port", "7860"]
|
|
app/main.py
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
import os
|
2 |
import shutil
|
3 |
-
import
|
4 |
-
from
|
5 |
-
from pathlib import Path
|
6 |
-
|
7 |
-
from fastapi import FastAPI, UploadFile
|
8 |
from fastapi.middleware import Middleware
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
10 |
from fastapi.responses import StreamingResponse
|
@@ -24,11 +21,47 @@ app = FastAPI(middleware=middleware)
|
|
24 |
files_dir = os.path.expanduser("~/wtp_be_files/")
|
25 |
session_assistant = ChatPDF()
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
@app.get("/query")
|
28 |
async def process_input(text: str):
|
29 |
if text and len(text.strip()) > 0:
|
30 |
text = text.strip()
|
31 |
-
print("PRINTING STREAM")
|
32 |
return StreamingResponse(session_assistant.ask(text), media_type='text/event-stream')
|
33 |
|
34 |
|
|
|
1 |
import os
|
2 |
import shutil
|
3 |
+
from typing import List
|
4 |
+
from fastapi import FastAPI, UploadFile, WebSocket, WebSocketDisconnect
|
|
|
|
|
|
|
5 |
from fastapi.middleware import Middleware
|
6 |
from fastapi.middleware.cors import CORSMiddleware
|
7 |
from fastapi.responses import StreamingResponse
|
|
|
21 |
files_dir = os.path.expanduser("~/wtp_be_files/")
|
22 |
session_assistant = ChatPDF()
|
23 |
|
24 |
+
class ConnectionManager:
|
25 |
+
def __init__(self):
|
26 |
+
self.active_connections: List[WebSocket] = []
|
27 |
+
|
28 |
+
async def connect(self, websocket: WebSocket):
|
29 |
+
await websocket.accept()
|
30 |
+
self.active_connections.append(websocket)
|
31 |
+
|
32 |
+
def disconnect(self, websocket: WebSocket):
|
33 |
+
self.active_connections.remove(websocket)
|
34 |
+
|
35 |
+
async def send_personal_message(self, message: str, websocket: WebSocket):
|
36 |
+
await websocket.send_text(message)
|
37 |
+
|
38 |
+
async def broadcast(self, message: str):
|
39 |
+
for connection in self.active_connections:
|
40 |
+
await connection.send_text(message)
|
41 |
+
|
42 |
+
manager = ConnectionManager()
|
43 |
+
|
44 |
+
@app.websocket("/ws/{client_id}")
|
45 |
+
async def websocket_endpoint(websocket: WebSocket, client_id: int):
|
46 |
+
await manager.connect(websocket)
|
47 |
+
now = datetime.now()
|
48 |
+
current_time = now.strftime("%H:%M")
|
49 |
+
try:
|
50 |
+
while True:
|
51 |
+
data = await websocket.receive_text()
|
52 |
+
# await manager.send_personal_message(f"You wrote: {data}", websocket)
|
53 |
+
message = {"time":current_time,"clientId":client_id,"message":data}
|
54 |
+
await manager.broadcast(json.dumps(message))
|
55 |
+
|
56 |
+
except WebSocketDisconnect:
|
57 |
+
manager.disconnect(websocket)
|
58 |
+
message = {"time":current_time,"clientId":client_id,"message":"Offline"}
|
59 |
+
await manager.broadcast(json.dumps(message))
|
60 |
+
|
61 |
@app.get("/query")
|
62 |
async def process_input(text: str):
|
63 |
if text and len(text.strip()) > 0:
|
64 |
text = text.strip()
|
|
|
65 |
return StreamingResponse(session_assistant.ask(text), media_type='text/event-stream')
|
66 |
|
67 |
|
app/rag.py
CHANGED
@@ -22,8 +22,6 @@ logging.basicConfig(level=logging.INFO)
|
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
24 |
class ChatPDF:
|
25 |
-
query_engine = None
|
26 |
-
|
27 |
def __init__(self):
|
28 |
self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=24)
|
29 |
|
@@ -45,7 +43,7 @@ class ChatPDF:
|
|
45 |
model_url="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_k_m.gguf",
|
46 |
temperature=0.1,
|
47 |
max_new_tokens=256,
|
48 |
-
context_window=3900,
|
49 |
# generate_kwargs={},
|
50 |
# model_kwargs={"n_gpu_layers": -1},
|
51 |
# messages_to_prompt=self.messages_to_prompt,
|
@@ -108,10 +106,11 @@ class ChatPDF:
|
|
108 |
logger.info("retrieving the response to the query")
|
109 |
streaming_response = self.query_engine.query(query)
|
110 |
print(streaming_response)
|
111 |
-
|
112 |
-
|
|
|
113 |
print(text)
|
114 |
yield text
|
115 |
|
116 |
def clear(self):
|
117 |
-
|
|
|
22 |
logger = logging.getLogger(__name__)
|
23 |
|
24 |
class ChatPDF:
|
|
|
|
|
25 |
def __init__(self):
|
26 |
self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=24)
|
27 |
|
|
|
43 |
model_url="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_k_m.gguf",
|
44 |
temperature=0.1,
|
45 |
max_new_tokens=256,
|
46 |
+
context_window=3900, #32k
|
47 |
# generate_kwargs={},
|
48 |
# model_kwargs={"n_gpu_layers": -1},
|
49 |
# messages_to_prompt=self.messages_to_prompt,
|
|
|
106 |
logger.info("retrieving the response to the query")
|
107 |
streaming_response = self.query_engine.query(query)
|
108 |
print(streaming_response)
|
109 |
+
print("PRINTING STREAM")
|
110 |
+
generator = streaming_response.response_gen
|
111 |
+
for text in generator:
|
112 |
print(text)
|
113 |
yield text
|
114 |
|
115 |
def clear(self):
|
116 |
+
pass
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ qdrant-client
|
|
5 |
python-dotenv
|
6 |
llama-index-llms-llama-cpp
|
7 |
llama-index-embeddings-fastembed
|
|
|
8 |
fastembed==0.2.7
|
|
|
5 |
python-dotenv
|
6 |
llama-index-llms-llama-cpp
|
7 |
llama-index-embeddings-fastembed
|
8 |
+
websockets
|
9 |
fastembed==0.2.7
|
start_service.sh
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
#!/bin/sh
|
2 |
-
|
3 |
-
# # Start Ollama in the background
|
4 |
-
# ollama serve &
|
5 |
-
|
6 |
-
# # Wait for Ollama to start
|
7 |
-
# sleep 5
|
8 |
-
|
9 |
-
# #
|
10 |
-
# ollama pull mxbai-embed-large
|
11 |
-
|
12 |
-
# # Pull and run <YOUR_MODEL_NAME>
|
13 |
-
# ollama pull qwen:1.8b
|
14 |
-
|
15 |
-
#
|
16 |
-
fastapi run /code/app/main.py --port 7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|