bys0318 commited on
Commit
a0a9e18
1 Parent(s): 5412729

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -5
app.py CHANGED
@@ -1,9 +1,5 @@
1
  import subprocess
2
 
3
- # Installing flash_attn
4
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
5
- shell=True)
6
-
7
  from threading import Thread
8
  import spaces
9
  import gradio as gr
@@ -65,7 +61,9 @@ def predict(history, prompt, max_length, top_p, temperature):
65
  t = Thread(target=model.generate, kwargs=generate_kwargs)
66
  t.start()
67
  for new_token in streamer:
68
- if new_token and '<|user|>' not in new_token:
 
 
69
  history[-1][1] += new_token
70
  yield history
71
 
 
1
  import subprocess
2
 
 
 
 
 
3
  from threading import Thread
4
  import spaces
5
  import gradio as gr
 
61
  t = Thread(target=model.generate, kwargs=generate_kwargs)
62
  t.start()
63
  for new_token in streamer:
64
+ if new_token and '<|user|>' in new_token:
65
+ new_token = new_token.split('<|user|>')[0]
66
+ if new_token:
67
  history[-1][1] += new_token
68
  yield history
69