Spaces:
Sleeping
Sleeping
Moshe Ofer
commited on
Commit
Β·
07e5e01
1
Parent(s):
da278a5
Initial commit for Hugging Face Space
Browse files- Dockerfile +17 -9
- README.md +36 -11
- app.py +9 -6
Dockerfile
CHANGED
@@ -17,7 +17,13 @@ ENV PYTHONUNBUFFERED=1
|
|
17 |
ENV EVENTLET_NO_GREENDNS=yes
|
18 |
ENV EVENTLET_THREADPOOL_SIZE=32
|
19 |
ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
|
20 |
-
ENV
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Copy application files
|
23 |
COPY . /app
|
@@ -26,18 +32,20 @@ COPY . /app
|
|
26 |
RUN pip install --no-cache-dir --upgrade pip
|
27 |
RUN pip install --no-cache-dir -r requirements.txt
|
28 |
|
29 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
EXPOSE 7860
|
31 |
|
32 |
-
# Modified command
|
33 |
CMD ["gunicorn", \
|
34 |
-
"--
|
35 |
-
"--workers", "1", \
|
36 |
-
"--worker-connections", "1000", \
|
37 |
-
"--timeout", "300", \
|
38 |
-
"--keep-alive", "65", \
|
39 |
"--bind", "0.0.0.0:7860", \
|
40 |
-
"--log-level", "debug", \
|
41 |
"--access-logfile", "-", \
|
42 |
"--error-logfile", "-", \
|
43 |
"app:app"]
|
|
|
17 |
ENV EVENTLET_NO_GREENDNS=yes
|
18 |
ENV EVENTLET_THREADPOOL_SIZE=32
|
19 |
ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
|
20 |
+
ENV EVENTLET_NONBLOCKING=1 # Enable non-blocking mode
|
21 |
+
ENV GUNICORN_TIMEOUT=300
|
22 |
+
ENV GUNICORN_WORKER_CLASS=eventlet
|
23 |
+
ENV GUNICORN_WORKERS=1
|
24 |
+
ENV GUNICORN_WORKER_CONNECTIONS=1000
|
25 |
+
ENV GUNICORN_KEEP_ALIVE=65
|
26 |
+
ENV GUNICORN_LOG_LEVEL=debug
|
27 |
|
28 |
# Copy application files
|
29 |
COPY . /app
|
|
|
32 |
RUN pip install --no-cache-dir --upgrade pip
|
33 |
RUN pip install --no-cache-dir -r requirements.txt
|
34 |
|
35 |
+
# Create gunicorn config file
|
36 |
+
RUN echo 'worker_class = "eventlet"' > gunicorn.conf.py && \
|
37 |
+
echo 'workers = 1' >> gunicorn.conf.py && \
|
38 |
+
echo 'worker_connections = 1000' >> gunicorn.conf.py && \
|
39 |
+
echo 'timeout = 300' >> gunicorn.conf.py && \
|
40 |
+
echo 'keepalive = 65' >> gunicorn.conf.py && \
|
41 |
+
echo 'loglevel = "debug"' >> gunicorn.conf.py
|
42 |
+
|
43 |
EXPOSE 7860
|
44 |
|
45 |
+
# Modified command with explicit configuration
|
46 |
CMD ["gunicorn", \
|
47 |
+
"--config", "gunicorn.conf.py", \
|
|
|
|
|
|
|
|
|
48 |
"--bind", "0.0.0.0:7860", \
|
|
|
49 |
"--access-logfile", "-", \
|
50 |
"--error-logfile", "-", \
|
51 |
"app:app"]
|
README.md
CHANGED
@@ -1,11 +1,36 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Beam Search Generator with MultiBeamTextStreamer
|
2 |
+
|
3 |
+
This Hugging Face Space hosts a **Beam Search Generator** web application, powered by a Flask backend and integrated with Hugging Face Transformers. The application provides real-time visualization of beam search generation, offering insights into how language models explore multiple text completion possibilities simultaneously.
|
4 |
+
|
5 |
+
## π Features
|
6 |
+
|
7 |
+
- Real-time visualization of active and completed beams.
|
8 |
+
- Adjustable generation parameters:
|
9 |
+
- Number of beams.
|
10 |
+
- Maximum tokens.
|
11 |
+
- Generation speed (using delay sliders).
|
12 |
+
- Seamless interaction with Hugging Face Transformers.
|
13 |
+
- Powered by the custom `MultiBeamTextStreamer` from the Transformers library.
|
14 |
+
|
15 |
+
## π How It Works
|
16 |
+
|
17 |
+
1. Input a **prompt** in the text box.
|
18 |
+
2. Configure generation settings:
|
19 |
+
- Number of beams.
|
20 |
+
- Maximum token count.
|
21 |
+
- Delay speed.
|
22 |
+
3. Click **Generate** to visualize the beam search process in real-time.
|
23 |
+
|
24 |
+
## π οΈ Setup Instructions
|
25 |
+
|
26 |
+
### Prerequisites
|
27 |
+
|
28 |
+
- Python 3.9+
|
29 |
+
- A Hugging Face account (if running the app locally, ensure access to the required models)
|
30 |
+
|
31 |
+
### Running Locally
|
32 |
+
|
33 |
+
1. **Clone the repository**:
|
34 |
+
```bash
|
35 |
+
git clone https://huggingface.co/spaces/<your-username>/<your-space-name>
|
36 |
+
cd <your-space-name>
|
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import eventlet
|
2 |
-
eventlet.monkey_patch(socket=True, select=True)
|
3 |
|
4 |
import eventlet.wsgi
|
5 |
|
@@ -7,16 +7,18 @@ from flask import Flask, render_template
|
|
7 |
from flask_socketio import SocketIO
|
8 |
from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
|
9 |
import torch
|
10 |
-
import time
|
11 |
|
12 |
app = Flask(__name__)
|
13 |
socketio = SocketIO(
|
14 |
app,
|
15 |
-
ping_timeout=60,
|
16 |
async_mode='eventlet',
|
|
|
|
|
|
|
17 |
cors_allowed_origins="*",
|
18 |
logger=True,
|
19 |
-
engineio_logger=True
|
|
|
20 |
)
|
21 |
# Initialize model and tokenizer
|
22 |
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
|
@@ -47,11 +49,12 @@ class WebSocketBeamStreamer(MultiBeamTextStreamer):
|
|
47 |
self.beam_texts[beam_idx] = new_text
|
48 |
if self.sleep_time > 0:
|
49 |
eventlet.sleep(self.sleep_time / 1000) # Convert milliseconds to seconds
|
50 |
-
# Force immediate emit and
|
51 |
socketio.emit('beam_update', {
|
52 |
'beam_idx': beam_idx,
|
53 |
'text': new_text
|
54 |
-
}, callback=lambda: eventlet.sleep(0))
|
|
|
55 |
|
56 |
def on_beam_finished(self, final_text: str):
|
57 |
"""Send completion notification through websocket"""
|
|
|
1 |
import eventlet
|
2 |
+
eventlet.monkey_patch(socket=True, select=True, thread=True)
|
3 |
|
4 |
import eventlet.wsgi
|
5 |
|
|
|
7 |
from flask_socketio import SocketIO
|
8 |
from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
|
9 |
import torch
|
|
|
10 |
|
11 |
app = Flask(__name__)
|
12 |
socketio = SocketIO(
|
13 |
app,
|
|
|
14 |
async_mode='eventlet',
|
15 |
+
message_queue=None, # Explicitly set to None for single-worker setup
|
16 |
+
ping_timeout=60,
|
17 |
+
ping_interval=25,
|
18 |
cors_allowed_origins="*",
|
19 |
logger=True,
|
20 |
+
engineio_logger=True,
|
21 |
+
async_handlers=True # Enable async handlers
|
22 |
)
|
23 |
# Initialize model and tokenizer
|
24 |
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
|
|
|
49 |
self.beam_texts[beam_idx] = new_text
|
50 |
if self.sleep_time > 0:
|
51 |
eventlet.sleep(self.sleep_time / 1000) # Convert milliseconds to seconds
|
52 |
+
# Force immediate emit and flush
|
53 |
socketio.emit('beam_update', {
|
54 |
'beam_idx': beam_idx,
|
55 |
'text': new_text
|
56 |
+
}, namespace='/', callback=lambda: eventlet.sleep(0))
|
57 |
+
socketio.sleep(0) # Force context switch
|
58 |
|
59 |
def on_beam_finished(self, final_text: str):
|
60 |
"""Send completion notification through websocket"""
|