Spaces:
Runtime error
Runtime error
import os | |
import tempfile | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from huggingface_hub import login | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
# Set the cache directory to a writable directory | |
cache_dir = os.getenv("HF_HOME", "/tmp/huggingface_cache") | |
# Ensure the cache directory is writable | |
os.makedirs(cache_dir, exist_ok=True) | |
os.environ["HF_HOME"] = cache_dir | |
# Retrieve the Hugging Face API token from environment variables | |
api_token = os.getenv("ttt") | |
if not api_token: | |
print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.") | |
exit(1) | |
# Log in to Hugging Face with the token | |
try: | |
login(api_token) | |
print("Successfully logged in to Hugging Face.") | |
except Exception as e: | |
print(f"Failed to log in to Hugging Face: {e}") | |
exit(1) | |
# Model and tokenizer names | |
model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism" | |
# Check if CUDA is available for GPU usage | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
try: | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Load the model with 4-bit quantization | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
load_in_4bit=True, | |
device_map="auto" | |
) | |
# Input text to the model | |
input_text = "What should I do if I get lost in Morocco?" | |
inputs = tokenizer(input_text, return_tensors="pt").to(device) | |
# Generate a response | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_length=100, | |
num_beams=5, | |
early_stopping=True | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print("Response:", response) | |
except Exception as e: | |
print(f"An error occurred: {e}") |