Spaces:

Ouiam123
/

my_chatbot_app

Runtime error

App Files Files Community

my_chatbot_app / app.py

Ouiam123

Update app.py

a8045ce verified 3 months ago

raw

history blame contribute delete

1.79 kB

	import os
	import tempfile
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from huggingface_hub import login
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Set the cache directory to a writable directory
	cache_dir = os.getenv("HF_HOME", "/tmp/huggingface_cache")

	# Ensure the cache directory is writable
	os.makedirs(cache_dir, exist_ok=True)
	os.environ["HF_HOME"] = cache_dir

	# Retrieve the Hugging Face API token from environment variables
	api_token = os.getenv("ttt")
	if not api_token:
	print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.")
	exit(1)

	# Log in to Hugging Face with the token
	try:
	login(api_token)
	print("Successfully logged in to Hugging Face.")
	except Exception as e:
	print(f"Failed to log in to Hugging Face: {e}")
	exit(1)

	# Model and tokenizer names
	model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism"

	# Check if CUDA is available for GPU usage
	device = "cuda" if torch.cuda.is_available() else "cpu"

	try:
	# Load the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Load the model with 4-bit quantization
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	load_in_4bit=True,
	device_map="auto"
	)

	# Input text to the model
	input_text = "What should I do if I get lost in Morocco?"
	inputs = tokenizer(input_text, return_tensors="pt").to(device)

	# Generate a response
	outputs = model.generate(
	inputs["input_ids"],
	max_length=100,
	num_beams=5,
	early_stopping=True
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	print("Response:", response)

	except Exception as e:
	print(f"An error occurred: {e}")