# import streamlit as st from transformers import AutoModel, AutoTokenizer import torch import streamlit as st import subprocess import sys st.title("Package Installation Test") # Display Python version st.write(f"Python version: {sys.version}") # Try to install transformers try: st.write("Attempting to install transformers...") subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"]) st.success("Transformers package installed successfully!") except Exception as e: st.error(f"Error installing transformers: {str(e)}") # List installed packages st.write("Installed packages:") try: installed_packages = subprocess.check_output([sys.executable, "-m", "pip", "list"]).decode() st.code(installed_packages) except Exception as e: st.error(f"Error listing packages: {str(e)}") # Page config st.set_page_config( page_title="Document Chatbot", layout="centered", # Changed to centered for better mobile view initial_sidebar_state="collapsed" ) @st.cache_resource # Use cache_resource instead of session state for HF Spaces def load_model(): model_name = "distilbert-base-uncased" return ( AutoModel.from_pretrained(model_name, device_map="auto"), AutoTokenizer.from_pretrained(model_name) ) def embed_document(document: str, model, tokenizer) -> torch.Tensor: inputs = tokenizer( document, return_tensors="pt", truncation=True, max_length=512, padding=True ) with torch.no_grad(): outputs = model(**inputs) return outputs.last_hidden_state[:, 0, :] def answer_question(question: str, document_embeddings: torch.Tensor, model, tokenizer) -> str: inputs = tokenizer( question, return_tensors="pt", truncation=True, max_length=512, padding=True ) with torch.no_grad(): outputs = model(**inputs) question_embeddings = outputs.last_hidden_state[:, 0, :] similarity = torch.cosine_similarity(document_embeddings, question_embeddings) if similarity.item() > 0.5: return f"Similarity score: {similarity.item():.2f}" return "Sorry, I couldn't find a relevant answer in the document." def main(): st.title("Document Chatbot") # Load model and tokenizer try: model, tokenizer = load_model() except Exception as e: st.error(f"Error loading model: {str(e)}") return # File upload - restrict to txt files for simplicity document_file = st.file_uploader( "Upload a text document (txt)", type=["txt"], help="Please upload a text file to analyze" ) if document_file is not None: try: document = document_file.read().decode("utf-8") st.success("Document uploaded successfully!") # Create document embeddings document_embeddings = embed_document(document, model, tokenizer) # Question input st.subheader("Ask a question") question = st.text_input("Enter your question about the document:") if question: with st.spinner("Finding answer..."): answer = answer_question(question, document_embeddings, model, tokenizer) st.write(answer) except Exception as e: st.error(f"Error processing document: {str(e)}") if __name__ == "__main__": main()