llama-app / app.py
Alaaeldin's picture
Update app.py
2c3c066 verified
import streamlit as st
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
from huggingface_hub import login
# Set page config for better display
st.set_page_config(page_title="LLaMA Chatbot", page_icon="πŸ¦™")
status_placeholder = st.empty()
# Check GPU
if torch.cuda.is_available():
st.sidebar.success("βœ… CUDA is available")
st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
st.sidebar.warning("⚠️ CUDA is not available. Using CPU.")
# Authentication with minimal token handling
try:
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
hf_token = st.secrets["HF_TOKEN"]
login(token=hf_token)
st.success("πŸ”‘ Successfully logged in to Hugging Face!")
except Exception as e:
st.error(f"🚫 Error with HF token: {str(e)}")
st.stop()
st.title("πŸ¦™ LLaMA Chatbot")
# Model loading with detailed status updates
@st.cache_resource
def load_model():
try:
model_path = "Alaaeldin/Llama-demo"
with st.spinner("πŸ”„ Loading tokenizer..."):
tokenizer = AutoTokenizer.from_pretrained(
model_path,
token=hf_token,
trust_remote_code=True
)
st.success("βœ… Tokenizer loaded!")
with st.spinner("πŸ”„ Loading model... This might take a few minutes..."):
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
token=hf_token,
trust_remote_code=True
)
st.success("βœ… Model loaded!")
return model, tokenizer
except Exception as e:
st.error(f"❌ Error loading model: {str(e)}")
return None, None
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Load model
model, tokenizer = load_model()
# Chat interface
if model and tokenizer:
st.success("✨ Ready to chat! Enter your message below.")
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Speak thy mind..."):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display user message
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
with st.chat_message("assistant"):
with st.spinner("πŸ€” Composing a verse..."):
try:
# Prepare input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
max_length=200,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Display response
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
except Exception as e:
st.error(f"Error generating response: {str(e)}")
else:
st.error("⚠️ Model loading failed. Please check the error messages above.")