Spaces:

DuckyBlender
/

phi3-youtube-summarizer

Paused

DuckyBlender commited on Aug 14, 2024

Commit

1bf6bb5

1 Parent(s): 61cc55a

added cpu compatibility (i hope)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,12 +8,18 @@ import torch
 # import dotenv
 # dotenv.load_dotenv()
-import subprocess
-subprocess.run(
-    "pip install flash_attn --no-build-isolation --break-system-packages",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-    shell=True,
-)
 # Uncomment and set your Hugging Face token if needed
 token = os.environ["HF_TOKEN"]
@@ -32,11 +38,10 @@ print("Loading model and tokenizer...")
 model_id = "microsoft/Phi-3-mini-128k-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    device_map="cuda",
     quantization_config=bnb_config,
     trust_remote_code=True,
-).eval()
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Define the system prompt and generation pipeline

 # import dotenv
 # dotenv.load_dotenv()
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
+    import subprocess
+    subprocess.run(
+        "pip install flash_attn --no-build-isolation --break-system-packages",
+        env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+        shell=True,
+    )
+else:
+    device = torch.device("cpu")
+    print("Using CPU")
 # Uncomment and set your Hugging Face token if needed
 token = os.environ["HF_TOKEN"]
 model_id = "microsoft/Phi-3-mini-128k-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     quantization_config=bnb_config,
     trust_remote_code=True,
+).to(device)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Define the system prompt and generation pipeline