Vitrous commited on
Commit
637b32d
1 Parent(s): 120b1ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -7,7 +7,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
  # Set environment variables for GPU usage and memory allocation
8
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
9
  torch.cuda.empty_cache()
10
- torch.cuda.set_per_process_memory_fraction(0.8) # Adjust the fraction as needed
11
 
12
  # Define device
13
  device = "cuda" # The device to load the model onto
@@ -18,9 +18,7 @@ system_message = ""
18
  # Load the model and tokenizer
19
  def hermes_model():
20
  tokenizer = AutoTokenizer.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ")
21
- model = AutoModelForCausalLM.from_pretrained(
22
- "TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ", low_cpu_mem_usage=True, device_map="auto"
23
- )
24
  return model, tokenizer
25
 
26
  model, tokenizer = hermes_model()
 
7
  # Set environment variables for GPU usage and memory allocation
8
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
9
  torch.cuda.empty_cache()
10
+ torch.cuda.set_per_process_memory_fraction(0.5) # Adjust the fraction as needed
11
 
12
  # Define device
13
  device = "cuda" # The device to load the model onto
 
18
  # Load the model and tokenizer
19
  def hermes_model():
20
  tokenizer = AutoTokenizer.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ")
21
+ model = AutoModelForCausalLM.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ", low_cpu_mem_usage=True, device_map="auto")
 
 
22
  return model, tokenizer
23
 
24
  model, tokenizer = hermes_model()