Spaces:

Vitrous
/

Artic-Intell

Paused

Vitrous commited on Jul 19, 2024

Commit

637b32d

•

1 Parent(s): 120b1ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # Set environment variables for GPU usage and memory allocation
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 torch.cuda.empty_cache()
-torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed
 # Define device
 device = "cuda"  # The device to load the model onto
@@ -18,9 +18,7 @@ system_message = ""
 # Load the model and tokenizer
 def hermes_model():
     tokenizer = AutoTokenizer.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ")
-    model = AutoModelForCausalLM.from_pretrained(
-        "TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ", low_cpu_mem_usage=True, device_map="auto"
-    )
     return model, tokenizer
 model, tokenizer = hermes_model()

 # Set environment variables for GPU usage and memory allocation
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 torch.cuda.empty_cache()
+torch.cuda.set_per_process_memory_fraction(0.5)  # Adjust the fraction as needed
 # Define device
 device = "cuda"  # The device to load the model onto
 # Load the model and tokenizer
 def hermes_model():
     tokenizer = AutoTokenizer.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ")
+    model = AutoModelForCausalLM.from_pretrained("TheBloke/CapybaraHermes-2.5-Mistral-7B-AWQ", low_cpu_mem_usage=True, device_map="auto")
     return model, tokenizer
 model, tokenizer = hermes_model()