|
import streamlit as st |
|
import torch |
|
import os |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from peft import PeftModel, PeftConfig |
|
from huggingface_hub import login |
|
|
|
|
|
st.set_page_config(page_title="LLaMA Chatbot", page_icon="π¦") |
|
status_placeholder = st.empty() |
|
|
|
|
|
if torch.cuda.is_available(): |
|
st.sidebar.success("β
CUDA is available") |
|
st.sidebar.info(f"Using GPU: {torch.cuda.get_device_name(0)}") |
|
else: |
|
st.sidebar.warning("β οΈ CUDA is not available. Using CPU.") |
|
|
|
|
|
try: |
|
hf_token = os.environ.get("HF_TOKEN") |
|
if not hf_token: |
|
hf_token = st.secrets["HF_TOKEN"] |
|
|
|
login(token=hf_token) |
|
st.success("π Successfully logged in to Hugging Face!") |
|
|
|
except Exception as e: |
|
st.error(f"π« Error with HF token: {str(e)}") |
|
st.stop() |
|
|
|
st.title("π¦ LLaMA Chatbot") |
|
|
|
|
|
@st.cache_resource |
|
def load_model(): |
|
try: |
|
model_path = "Alaaeldin/Llama-demo" |
|
|
|
with st.spinner("π Loading tokenizer..."): |
|
tokenizer = AutoTokenizer.from_pretrained( |
|
model_path, |
|
token=hf_token, |
|
trust_remote_code=True |
|
) |
|
st.success("β
Tokenizer loaded!") |
|
|
|
with st.spinner("π Loading model... This might take a few minutes..."): |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_path, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
token=hf_token, |
|
trust_remote_code=True |
|
) |
|
st.success("β
Model loaded!") |
|
|
|
return model, tokenizer |
|
except Exception as e: |
|
st.error(f"β Error loading model: {str(e)}") |
|
return None, None |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
|
|
if model and tokenizer: |
|
st.success("β¨ Ready to chat! Enter your message below.") |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
if prompt := st.chat_input("Speak thy mind..."): |
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
with st.chat_message("user"): |
|
st.markdown(prompt) |
|
|
|
|
|
with st.chat_message("assistant"): |
|
with st.spinner("π€ Composing a verse..."): |
|
try: |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs["input_ids"], |
|
max_length=200, |
|
num_return_sequences=1, |
|
temperature=0.7, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
st.markdown(response) |
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
except Exception as e: |
|
st.error(f"Error generating response: {str(e)}") |
|
|
|
else: |
|
st.error("β οΈ Model loading failed. Please check the error messages above.") |