UW-SBEL-ChronoGemma-27b-it

Sleeping

jwang2373 commited on Sep 13, 2024

Commit

d636d83

verified ·

1 Parent(s): b747480

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import time
 import spaces
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
 import gradio as gr
 from threading import Thread
@@ -15,10 +15,7 @@ PLACEHOLDER = """
 <p>Hi! I'm a PyChrono Digital Twin expert. How can I assist you today?</p>
 </center>
 """
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
 CSS = """
 .duplicate-button {
     margin: auto !important;
@@ -34,7 +31,7 @@ h3 {
 device = "cuda" if torch.cuda.is_available() else "cpu"
 tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(MODEL, trust_remote_code=True, device_map="auto")
 model = model.eval()
 @spaces.GPU()
@@ -138,4 +135,4 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 import time
 import spaces
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
 <p>Hi! I'm a PyChrono Digital Twin expert. How can I assist you today?</p>
 </center>
 """
 CSS = """
 .duplicate-button {
     margin: auto !important;
 device = "cuda" if torch.cuda.is_available() else "cpu"
 tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
 model = model.eval()
 @spaces.GPU()
     )
 if __name__ == "__main__":
+    demo.launch()