Spaces:

davidizzle
/

LIA_LLM_PoC

Running

davidizzle commited on 18 days ago

Commit

8020e39

1 Parent(s): d8f007f

Model upgrade and GPU support

Files changed (2) hide show

README.md CHANGED Viewed

@@ -23,4 +23,5 @@ An interactive [Streamlit](https://streamlit.io) app to test [DeepSeek](https://
 ```bash
 pip install -r requirements.txt
-streamlit run app.py

 ```bash
 pip install -r requirements.txt
+streamlit run app.py
+```

app.py CHANGED Viewed

@@ -40,14 +40,17 @@ def load_model():
     # As Gemma is gated, we will show functionality of the demo using DeepSeek-R1-Distill-Qwen-1.5B model
     # model_id = "google/gemma-2b-it"
     # tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
-    model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map=None,
-        torch_dtype=torch.float32
     )
-    model.to("cpu")
     return tokenizer, model
 tokenizer, model = load_model()
@@ -95,7 +98,11 @@ if st.button("Generate"):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
-        outputs = model.generate(**inputs, max_new_tokens=100, temperature=1.0, top_p=0.95)
     # Back to still
     # gif_html.markdown(

     # As Gemma is gated, we will show functionality of the demo using DeepSeek-R1-Distill-Qwen-1.5B model
     # model_id = "google/gemma-2b-it"
     # tokenizer = AutoTokenizer.from_pretrained(model_id, token=True)
+    # model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+    model_id = "deepseek-ai/deepseek-llm-7b-chat"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        # device_map=None,
+        # torch_dtype=torch.float32
+        device_map="auto",
+        torch_dtype=torch.float16
     )
+    # model.to("cpu")
     return tokenizer, model
 tokenizer, model = load_model()
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
+        outputs = model.generate(   **inputs,
+                                    # max_new_tokens=100,
+                                    max_new_tokens=200,
+                                    temperature=1.0,
+                                    top_p=0.95)
     # Back to still
     # gif_html.markdown(