File size: 3,983 Bytes
8a0a858 2e86fc7 8a0a858 2c3c066 8a0a858 2c3c066 8a0a858 2c3c066 8a0a858 2e86fc7 8a0a858 2e86fc7 8a0a858 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
from huggingface_hub import login
# Set page config for better display
st.set_page_config(page_title="LLaMA Chatbot", page_icon="π¦")
status_placeholder = st.empty()
# Check GPU
if torch.cuda.is_available():
st.sidebar.success("β
CUDA is available")
st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
st.sidebar.warning("β οΈ CUDA is not available. Using CPU.")
# Authentication with minimal token handling
try:
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
hf_token = st.secrets["HF_TOKEN"]
login(token=hf_token)
st.success("π Successfully logged in to Hugging Face!")
except Exception as e:
st.error(f"π« Error with HF token: {str(e)}")
st.stop()
st.title("π¦ LLaMA Chatbot")
# Model loading with detailed status updates
@st.cache_resource
def load_model():
try:
model_path = "Alaaeldin/Llama-demo"
with st.spinner("π Loading tokenizer..."):
tokenizer = AutoTokenizer.from_pretrained(
model_path,
token=hf_token,
trust_remote_code=True
)
st.success("β
Tokenizer loaded!")
with st.spinner("π Loading model... This might take a few minutes..."):
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
token=hf_token,
trust_remote_code=True
)
st.success("β
Model loaded!")
return model, tokenizer
except Exception as e:
st.error(f"β Error loading model: {str(e)}")
return None, None
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Load model
model, tokenizer = load_model()
# Chat interface
if model and tokenizer:
st.success("β¨ Ready to chat! Enter your message below.")
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Speak thy mind..."):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display user message
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
with st.chat_message("assistant"):
with st.spinner("π€ Composing a verse..."):
try:
# Prepare input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
max_length=200,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Display response
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
except Exception as e:
st.error(f"Error generating response: {str(e)}")
else:
st.error("β οΈ Model loading failed. Please check the error messages above.") |