DuckyBlender commited on
Commit
1bf6bb5
·
1 Parent(s): 61cc55a

added cpu compatibility (i hope)

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -8,12 +8,18 @@ import torch
8
  # import dotenv
9
  # dotenv.load_dotenv()
10
 
11
- import subprocess
12
- subprocess.run(
13
- "pip install flash_attn --no-build-isolation --break-system-packages",
14
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
15
- shell=True,
16
- )
 
 
 
 
 
 
17
 
18
  # Uncomment and set your Hugging Face token if needed
19
  token = os.environ["HF_TOKEN"]
@@ -32,11 +38,10 @@ print("Loading model and tokenizer...")
32
  model_id = "microsoft/Phi-3-mini-128k-instruct"
33
  model = AutoModelForCausalLM.from_pretrained(
34
  model_id,
35
- device_map="cuda",
36
  quantization_config=bnb_config,
37
  trust_remote_code=True,
38
 
39
- ).eval()
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
41
 
42
  # Define the system prompt and generation pipeline
 
8
  # import dotenv
9
  # dotenv.load_dotenv()
10
 
11
+ if torch.cuda.is_available():
12
+ device = torch.device("cuda")
13
+ print(f"Using GPU: {torch.cuda.get_device_name(device)}")
14
+ import subprocess
15
+ subprocess.run(
16
+ "pip install flash_attn --no-build-isolation --break-system-packages",
17
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
18
+ shell=True,
19
+ )
20
+ else:
21
+ device = torch.device("cpu")
22
+ print("Using CPU")
23
 
24
  # Uncomment and set your Hugging Face token if needed
25
  token = os.environ["HF_TOKEN"]
 
38
  model_id = "microsoft/Phi-3-mini-128k-instruct"
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_id,
 
41
  quantization_config=bnb_config,
42
  trust_remote_code=True,
43
 
44
+ ).to(device)
45
  tokenizer = AutoTokenizer.from_pretrained(model_id)
46
 
47
  # Define the system prompt and generation pipeline