Moshe Ofer commited on
Commit
da278a5
·
1 Parent(s): 8c41f13

Initial commit for Hugging Face Space

Browse files
Files changed (5) hide show
  1. Dockerfile +15 -2
  2. __pycache__/app.cpython-312.pyc +0 -0
  3. app.py +14 -4
  4. temp.py +0 -175
  5. templates/index.html +5 -3
Dockerfile CHANGED
@@ -2,29 +2,42 @@ FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && apt-get install -y --no-install-recommends \
6
  build-essential \
7
- git && \
8
- rm -rf /var/lib/apt/lists/*
9
 
 
10
  RUN mkdir -p /app/cache && chmod -R 777 /app/cache
11
  ENV HF_HOME=/app/cache
12
 
 
13
  ENV PYTHONUNBUFFERED=1
14
  ENV EVENTLET_NO_GREENDNS=yes
15
  ENV EVENTLET_THREADPOOL_SIZE=32
16
  ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
 
17
 
 
18
  COPY . /app
19
 
 
20
  RUN pip install --no-cache-dir --upgrade pip
21
  RUN pip install --no-cache-dir -r requirements.txt
22
 
 
23
  EXPOSE 7860
24
 
 
25
  CMD ["gunicorn", \
26
  "--worker-class", "eventlet", \
27
  "--workers", "1", \
 
28
  "--timeout", "300", \
 
29
  "--bind", "0.0.0.0:7860", \
 
 
 
30
  "app:app"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies
6
  RUN apt-get update && apt-get install -y --no-install-recommends \
7
  build-essential \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
 
11
+ # Set up cache directory
12
  RUN mkdir -p /app/cache && chmod -R 777 /app/cache
13
  ENV HF_HOME=/app/cache
14
 
15
+ # Set environment variables for proper eventlet operation
16
  ENV PYTHONUNBUFFERED=1
17
  ENV EVENTLET_NO_GREENDNS=yes
18
  ENV EVENTLET_THREADPOOL_SIZE=32
19
  ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
20
+ ENV GUNICORN_CMD_ARGS="--worker-class eventlet --workers 1 --timeout 300 --keep-alive 65 --log-level debug --access-logfile - --error-logfile -"
21
 
22
+ # Copy application files
23
  COPY . /app
24
 
25
+ # Install Python dependencies
26
  RUN pip install --no-cache-dir --upgrade pip
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
+ # Expose port
30
  EXPOSE 7860
31
 
32
+ # Modified command to use explicit configuration
33
  CMD ["gunicorn", \
34
  "--worker-class", "eventlet", \
35
  "--workers", "1", \
36
+ "--worker-connections", "1000", \
37
  "--timeout", "300", \
38
+ "--keep-alive", "65", \
39
  "--bind", "0.0.0.0:7860", \
40
+ "--log-level", "debug", \
41
+ "--access-logfile", "-", \
42
+ "--error-logfile", "-", \
43
  "app:app"]
__pycache__/app.cpython-312.pyc ADDED
Binary file (5 kB). View file
 
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import eventlet
2
- eventlet.monkey_patch()
 
 
 
3
  from flask import Flask, render_template
4
  from flask_socketio import SocketIO
5
  from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
@@ -7,8 +10,14 @@ import torch
7
  import time
8
 
9
  app = Flask(__name__)
10
- socketio = SocketIO(app, ping_timeout=60)
11
-
 
 
 
 
 
 
12
  # Initialize model and tokenizer
13
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -38,10 +47,11 @@ class WebSocketBeamStreamer(MultiBeamTextStreamer):
38
  self.beam_texts[beam_idx] = new_text
39
  if self.sleep_time > 0:
40
  eventlet.sleep(self.sleep_time / 1000) # Convert milliseconds to seconds
 
41
  socketio.emit('beam_update', {
42
  'beam_idx': beam_idx,
43
  'text': new_text
44
- })
45
 
46
  def on_beam_finished(self, final_text: str):
47
  """Send completion notification through websocket"""
 
1
  import eventlet
2
+ eventlet.monkey_patch(socket=True, select=True)
3
+
4
+ import eventlet.wsgi
5
+
6
  from flask import Flask, render_template
7
  from flask_socketio import SocketIO
8
  from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
 
10
  import time
11
 
12
  app = Flask(__name__)
13
+ socketio = SocketIO(
14
+ app,
15
+ ping_timeout=60,
16
+ async_mode='eventlet',
17
+ cors_allowed_origins="*",
18
+ logger=True,
19
+ engineio_logger=True
20
+ )
21
  # Initialize model and tokenizer
22
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
47
  self.beam_texts[beam_idx] = new_text
48
  if self.sleep_time > 0:
49
  eventlet.sleep(self.sleep_time / 1000) # Convert milliseconds to seconds
50
+ # Force immediate emit and wait for confirmation
51
  socketio.emit('beam_update', {
52
  'beam_idx': beam_idx,
53
  'text': new_text
54
+ }, callback=lambda: eventlet.sleep(0))
55
 
56
  def on_beam_finished(self, final_text: str):
57
  """Send completion notification through websocket"""
temp.py DELETED
@@ -1,175 +0,0 @@
1
- import argparse
2
- import os
3
-
4
- from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
5
-
6
-
7
- class BeamOutputManager:
8
- """Manages file handlers for beam outputs"""
9
-
10
- def __init__(self, output_dir: str, num_beams: int):
11
- self.output_dir = output_dir
12
- self.num_beams = num_beams
13
- self.counter = 0
14
-
15
- # Create main output directory and closed beams directory
16
- os.makedirs(output_dir, exist_ok=True)
17
- self.closed_beams_dir = os.path.join(output_dir, "closed_beams")
18
- os.makedirs(self.closed_beams_dir, exist_ok=True)
19
-
20
- # Store complete text for each beam
21
- self.beam_texts = {i: "" for i in range(num_beams)}
22
- self.active_beams = set(range(num_beams))
23
-
24
- # Initialize empty files
25
- for beam_idx in range(num_beams):
26
- filename = os.path.join(output_dir, f'beam_{beam_idx}.txt')
27
- with open(filename, 'w', encoding='utf-8') as f:
28
- f.write('')
29
-
30
- def write_to_beam(self, beam_idx: int, text: str):
31
- """Write text to the specified beam's file"""
32
- if 0 <= beam_idx < self.num_beams and beam_idx in self.active_beams:
33
- # Update stored text
34
- self.beam_texts[beam_idx] = text
35
-
36
- # Write complete text to file
37
- filename = os.path.join(self.output_dir, f'beam_{beam_idx}.txt')
38
- with open(filename, 'w', encoding='utf-8') as f:
39
- f.write(self.beam_texts[beam_idx])
40
-
41
- def finalize_beam(self, final_text: str):
42
- """
43
- Handle a completed beam by creating a new file in the closed_beams directory.
44
-
45
- Args:
46
- final_text (str): The complete text generated by the finished beam
47
- """
48
- # Create a timestamp-based filename to ensure uniqueness
49
- self.counter += 1
50
- filename = os.path.join(self.closed_beams_dir, f'completed_beam_{self.counter}.txt')
51
-
52
- # Write the final text to the completed beam file
53
- with open(filename, 'w', encoding='utf-8') as f:
54
- f.write(final_text)
55
-
56
- return filename
57
-
58
-
59
- def setup_model_and_tokenizer(model_name):
60
- """
61
- Initialize the model and tokenizer.
62
-
63
- Args:
64
- model_name (str): Name of the model to use
65
-
66
- Returns:
67
- tuple: (model, tokenizer)
68
- """
69
- tokenizer = AutoTokenizer.from_pretrained(model_name)
70
- model = AutoModelForCausalLM.from_pretrained(
71
- model_name,
72
- torch_dtype="auto",
73
- device_map="auto"
74
- )
75
- return model, tokenizer
76
-
77
-
78
- def generate_with_beam_search(model, tokenizer, user_prompt, output_dir, num_beams=5, max_new_tokens=512):
79
- """
80
- Generate responses using beam search and write results to files.
81
-
82
- Args:
83
- model: The language model
84
- tokenizer: The tokenizer
85
- user_prompt (str): Input prompt
86
- output_dir (str): Directory to save beam outputs
87
- num_beams (int): Number of beams to use
88
- max_new_tokens (int): Maximum number of new tokens to generate
89
- """
90
- # Initialize the output manager
91
- output_manager = BeamOutputManager(output_dir, num_beams)
92
-
93
- def on_beam_update(beam_idx: int, new_text: str):
94
- """Handler for beam updates - write new text to file"""
95
- output_manager.write_to_beam(beam_idx, new_text)
96
-
97
- def on_beam_finished(final_text: str):
98
- """Handler for completed beams - create final output file"""
99
- final_path = output_manager.finalize_beam(final_text)
100
- print(f"\nCompleted beam saved to: {final_path}")
101
-
102
- # Create messages format
103
- messages = [
104
- {"role": "system", "content": "You are a helpful assistant."},
105
- {"role": "user", "content": user_prompt}
106
- ]
107
-
108
- # Apply chat template
109
- text = tokenizer.apply_chat_template(
110
- messages,
111
- tokenize=False,
112
- add_generation_prompt=True
113
- )
114
-
115
- # Prepare inputs
116
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
117
-
118
- # Initialize streamer with handlers
119
- streamer = MultiBeamTextStreamer(
120
- tokenizer=tokenizer,
121
- num_beams=num_beams,
122
- on_beam_update=on_beam_update,
123
- on_beam_finished=on_beam_finished,
124
- skip_prompt=True
125
- )
126
-
127
- # Generate with beam search
128
- model.generate(
129
- **model_inputs,
130
- num_beams=num_beams,
131
- num_return_sequences=num_beams,
132
- max_new_tokens=max_new_tokens,
133
- output_scores=True,
134
- return_dict_in_generate=True,
135
- early_stopping=True,
136
- streamer=streamer
137
- )
138
-
139
-
140
- def main():
141
- # Setup command line arguments
142
- parser = argparse.ArgumentParser(description='Language Model Text Generation with Beam Search')
143
- parser.add_argument('--model', type=str, default='Qwen/Qwen2.5-0.5B-Instruct',
144
- help='Name of the model to use')
145
- parser.add_argument('--num_beams', type=int, default=5,
146
- help='Number of beams for beam search')
147
- parser.add_argument('--max_tokens', type=int, default=512,
148
- help='Maximum number of new tokens to generate')
149
- parser.add_argument('--output_dir', type=str, default='beam_outputs',
150
- help='Directory to save beam outputs')
151
-
152
- args = parser.parse_args()
153
-
154
- # Initialize model and tokenizer
155
- model, tokenizer = setup_model_and_tokenizer(args.model)
156
-
157
- # Interactive loop
158
- while True:
159
- prompt = input("\nEnter your prompt (or 'quit' to exit): ")
160
- if prompt.lower() == 'quit':
161
- break
162
-
163
- generate_with_beam_search(
164
- model,
165
- tokenizer,
166
- prompt,
167
- args.output_dir,
168
- num_beams=args.num_beams,
169
- max_new_tokens=args.max_tokens
170
- )
171
- print(f"\nOutputs written to: {args.output_dir}/beam_*.txt")
172
-
173
-
174
- if __name__ == "__main__":
175
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
templates/index.html CHANGED
@@ -396,14 +396,16 @@
396
  </div>
397
 
398
  <script>
399
- // Replace the socket initialization with:
400
  let socket = io({
401
  transports: ['websocket'],
402
  reconnection: true,
403
  reconnectionAttempts: 5,
404
  reconnectionDelay: 1000,
405
- path: '/socket.io/', // Explicitly set the path
406
- upgrade: false // Disable transport upgrades
 
 
 
407
  });
408
  let beams = {};
409
  let completedBeams = [];
 
396
  </div>
397
 
398
  <script>
 
399
  let socket = io({
400
  transports: ['websocket'],
401
  reconnection: true,
402
  reconnectionAttempts: 5,
403
  reconnectionDelay: 1000,
404
+ path: '/socket.io/',
405
+ upgrade: false,
406
+ forceNew: true,
407
+ pingTimeout: 60000,
408
+ pingInterval: 25000
409
  });
410
  let beams = {};
411
  let completedBeams = [];