my_chatbot_app / app.py
Ouiam123's picture
Update app.py
a8045ce verified
import os
import tempfile
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Set the cache directory to a writable directory
cache_dir = os.getenv("HF_HOME", "/tmp/huggingface_cache")
# Ensure the cache directory is writable
os.makedirs(cache_dir, exist_ok=True)
os.environ["HF_HOME"] = cache_dir
# Retrieve the Hugging Face API token from environment variables
api_token = os.getenv("ttt")
if not api_token:
print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.")
exit(1)
# Log in to Hugging Face with the token
try:
login(api_token)
print("Successfully logged in to Hugging Face.")
except Exception as e:
print(f"Failed to log in to Hugging Face: {e}")
exit(1)
# Model and tokenizer names
model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism"
# Check if CUDA is available for GPU usage
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load the model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
model_name,
load_in_4bit=True,
device_map="auto"
)
# Input text to the model
input_text = "What should I do if I get lost in Morocco?"
inputs = tokenizer(input_text, return_tensors="pt").to(device)
# Generate a response
outputs = model.generate(
inputs["input_ids"],
max_length=100,
num_beams=5,
early_stopping=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Response:", response)
except Exception as e:
print(f"An error occurred: {e}")