import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer from huggingface_hub import login from threading import Thread import PyPDF2 import pandas as pd import torch import time import os from transformers import AutoModelForMaskedLM # Check if 'peft' is installed try: from peft import PeftModel, PeftConfig except ImportError: raise ImportError( "The 'peft' library is required but not installed. " "Please install it using: `pip install peft`" ) # 🔐 Hardcoded Hugging Face Token HF_TOKEN = os.environ.get("HF_TOKEN") # Replace with your actual token # Set page configuration st.set_page_config( page_title="Assistente LGT | Angola", page_icon="🚀", layout="centered" ) # Model base and options BASE_MODEL_NAME = "neuralmind/bert-base-portuguese-cased" #"pierreguillou/gpt2-small-portuguese" #"unicamp-dl/ptt5-base-portuguese-vocab" #"mistralai/Mistral-7B-Instruct-v0.2" MODEL_OPTIONS = { "Full Fine-Tuned": "amiguel/mistral-angolan-laborlaw-bert-base-pt", #"amiguel/mistral-angolan-laborlaw-gpt2",#"amiguel/mistral-angolan-laborlaw-ptt5", #"amiguel/mistral-angolan-laborlaw", "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora", "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora" } st.title("🚀 Assistente LGT | Angola 🚀") USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png" BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg" # Sidebar with st.sidebar: st.header("Model Selection 🤖") model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0) selected_model = MODEL_OPTIONS[model_type] st.header("Upload Documents 📂") uploaded_file = st.file_uploader( "Choose a PDF or XLSX file", type=["pdf", "xlsx"], label_visibility="collapsed" ) # Session state if "messages" not in st.session_state: st.session_state.messages = [] # File processor @st.cache_data def process_file(uploaded_file): if uploaded_file is None: return "" try: if uploaded_file.type == "application/pdf": pdf_reader = PyPDF2.PdfReader(uploaded_file) return "\n".join([page.extract_text() for page in pdf_reader.pages]) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": df = pd.read_excel(uploaded_file) return df.to_markdown() except Exception as e: st.error(f"📄 Error processing file: {str(e)}") return "" # Model loader @st.cache_resource def load_model(model_type, selected_model): try: login(token=HF_TOKEN) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=HF_TOKEN) if model_type == "Full Fine-Tuned": model = AutoModelForMaskedLM.from_pretrained( selected_model, torch_dtype=torch.bfloat16, # or float32 for compatibility token=HF_TOKEN ).to("cuda" if torch.cuda.is_available() else "cpu") #model = AutoModelForCausalLM.from_pretrained( #selected_model, # torch_dtype=torch.bfloat16, # device_map="auto", # token=HF_TOKEN # else: base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_NAME, torch_dtype=torch.bfloat16, device_map="auto", token=HF_TOKEN ) model = PeftModel.from_pretrained( base_model, selected_model, torch_dtype=torch.bfloat16, is_trainable=False, token=HF_TOKEN ) return model, tokenizer except Exception as e: st.error(f"🤖 Model loading failed: {str(e)}") return None # Generation function def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True): full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:" streamer = TextIteratorStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True ) inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device) generation_kwargs = { "input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "max_new_tokens": 1024, "temperature": 0.7, "top_p": 0.9, "repetition_penalty": 1.1, "do_sample": True, "use_cache": use_cache, "streamer": streamer } Thread(target=model.generate, kwargs=generation_kwargs).start() return streamer # Display chat history for message in st.session_state.messages: avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR with st.chat_message(message["role"], avatar=avatar): st.markdown(message["content"]) # Prompt interaction if prompt := st.chat_input("Ask your inspection question..."): # Load model if necessary if "model" not in st.session_state or st.session_state.get("model_type") != model_type: model_data = load_model(model_type, selected_model) if model_data is None: st.error("Failed to load model.") st.stop() st.session_state.model, st.session_state.tokenizer = model_data st.session_state.model_type = model_type model = st.session_state.model tokenizer = st.session_state.tokenizer with st.chat_message("user", avatar=USER_AVATAR): st.markdown(prompt) st.session_state.messages.append({"role": "user", "content": prompt}) file_context = process_file(uploaded_file) if model and tokenizer: try: with st.chat_message("assistant", avatar=BOT_AVATAR): start_time = time.time() streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True) response_container = st.empty() full_response = "" for chunk in streamer: cleaned_chunk = chunk.replace("", "").replace("", "").strip() full_response += cleaned_chunk + " " response_container.markdown(full_response + "▌", unsafe_allow_html=True) end_time = time.time() input_tokens = len(tokenizer(prompt)["input_ids"]) output_tokens = len(tokenizer(full_response)["input_ids"]) speed = output_tokens / (end_time - start_time) input_cost = (input_tokens / 1_000_000) * 5 output_cost = (output_tokens / 1_000_000) * 15 total_cost_usd = input_cost + output_cost total_cost_aoa = total_cost_usd * 1160 st.caption( f"🔑 Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | " f"🕒 Speed: {speed:.1f}t/s | 💰 Cost (USD): ${total_cost_usd:.4f} | " f"💵 Cost (AOA): {total_cost_aoa:.4f}" ) response_container.markdown(full_response) st.session_state.messages.append({"role": "assistant", "content": full_response}) except Exception as e: st.error(f"⚡ Generation error: {str(e)}") else: st.error("🤖 Model not loaded!")