Spaces:

mikeee
/

chatglm2-6b-4bit

Runtime error

mikeee commited on Jun 28, 2023

Commit

1a360d6

•

1 Parent(s): adc107a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,7 @@
 from textwrap import dedent
 from transformers import AutoModel, AutoTokenizer
 import gradio as gr
 import mdtex2html
@@ -107,10 +108,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.HTML("""<h1 align="center">ChatGLM2-6B-int4</h1>""")
     with gr.Accordion("Info", open=False):
         _ = """
-            A query takes from 30 seconds to a few hundred seconds, dependent on the number of words/characters
             the question and answer contain.
-            * Low temperature: responses will be more deterministic and focused; High temperature more creative.
             * Suggested temperatures -- translation: up to 0.3; chatting: > 0.4
@@ -119,6 +120,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             For a table of example values for different scenarios, refer to [this](https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683)
             If the instance is not on a GPU (T4), it will be very slow. You can try to run the colab notebook [chatglm2-6b-4bit colab notebook](https://colab.research.google.com/drive/1WkF7kOjVCcBBatDHjaGkuJHnPdMWNtbW?usp=sharing) for a spin.
             """
         gr.Markdown(dedent(_))
     chatbot = gr.Chatbot()

 from textwrap import dedent
+# credit to https://github.com/THUDM/ChatGLM2-6B/blob/main/web_demo.py
 from transformers import AutoModel, AutoTokenizer
 import gradio as gr
 import mdtex2html
     gr.HTML("""<h1 align="center">ChatGLM2-6B-int4</h1>""")
     with gr.Accordion("Info", open=False):
         _ = """
+            A query takes from 30 seconds to a few tens of seconds, dependent on the number of words/characters
             the question and answer contain.
+            * Low temperature: responses will be more deterministic and focused; High temperature: responses more creative.
             * Suggested temperatures -- translation: up to 0.3; chatting: > 0.4
             For a table of example values for different scenarios, refer to [this](https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683)
             If the instance is not on a GPU (T4), it will be very slow. You can try to run the colab notebook [chatglm2-6b-4bit colab notebook](https://colab.research.google.com/drive/1WkF7kOjVCcBBatDHjaGkuJHnPdMWNtbW?usp=sharing) for a spin.
+            The T4 GPU is sponsored by a community GPU grant from Huggingface. Thanks a lot!
             """
         gr.Markdown(dedent(_))
     chatbot = gr.Chatbot()