mikeee commited on
Commit
1a360d6
1 Parent(s): adc107a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -7,6 +7,7 @@
7
 
8
  from textwrap import dedent
9
 
 
10
  from transformers import AutoModel, AutoTokenizer
11
  import gradio as gr
12
  import mdtex2html
@@ -107,10 +108,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
107
  gr.HTML("""<h1 align="center">ChatGLM2-6B-int4</h1>""")
108
  with gr.Accordion("Info", open=False):
109
  _ = """
110
- A query takes from 30 seconds to a few hundred seconds, dependent on the number of words/characters
111
  the question and answer contain.
112
 
113
- * Low temperature: responses will be more deterministic and focused; High temperature more creative.
114
 
115
  * Suggested temperatures -- translation: up to 0.3; chatting: > 0.4
116
 
@@ -119,6 +120,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
119
  For a table of example values for different scenarios, refer to [this](https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683)
120
 
121
  If the instance is not on a GPU (T4), it will be very slow. You can try to run the colab notebook [chatglm2-6b-4bit colab notebook](https://colab.research.google.com/drive/1WkF7kOjVCcBBatDHjaGkuJHnPdMWNtbW?usp=sharing) for a spin.
 
 
122
  """
123
  gr.Markdown(dedent(_))
124
  chatbot = gr.Chatbot()
 
7
 
8
  from textwrap import dedent
9
 
10
+ # credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
11
  from transformers import AutoModel, AutoTokenizer
12
  import gradio as gr
13
  import mdtex2html
 
108
  gr.HTML("""<h1 align="center">ChatGLM2-6B-int4</h1>""")
109
  with gr.Accordion("Info", open=False):
110
  _ = """
111
+ A query takes from 30 seconds to a few tens of seconds, dependent on the number of words/characters
112
  the question and answer contain.
113
 
114
+ * Low temperature: responses will be more deterministic and focused; High temperature: responses more creative.
115
 
116
  * Suggested temperatures -- translation: up to 0.3; chatting: > 0.4
117
 
 
120
  For a table of example values for different scenarios, refer to [this](https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683)
121
 
122
  If the instance is not on a GPU (T4), it will be very slow. You can try to run the colab notebook [chatglm2-6b-4bit colab notebook](https://colab.research.google.com/drive/1WkF7kOjVCcBBatDHjaGkuJHnPdMWNtbW?usp=sharing) for a spin.
123
+
124
+ The T4 GPU is sponsored by a community GPU grant from Huggingface. Thanks a lot!
125
  """
126
  gr.Markdown(dedent(_))
127
  chatbot = gr.Chatbot()