mohammedriza-rahman's picture
Update app.py
20ef73e verified
# import streamlit as st
from transformers import AutoModel, AutoTokenizer
import torch
import streamlit as st
import subprocess
import sys
st.title("Package Installation Test")
# Display Python version
st.write(f"Python version: {sys.version}")
# Try to install transformers
try:
st.write("Attempting to install transformers...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
st.success("Transformers package installed successfully!")
except Exception as e:
st.error(f"Error installing transformers: {str(e)}")
# List installed packages
st.write("Installed packages:")
try:
installed_packages = subprocess.check_output([sys.executable, "-m", "pip", "list"]).decode()
st.code(installed_packages)
except Exception as e:
st.error(f"Error listing packages: {str(e)}")
# Page config
st.set_page_config(
page_title="Document Chatbot",
layout="centered", # Changed to centered for better mobile view
initial_sidebar_state="collapsed"
)
@st.cache_resource # Use cache_resource instead of session state for HF Spaces
def load_model():
model_name = "distilbert-base-uncased"
return (
AutoModel.from_pretrained(model_name, device_map="auto"),
AutoTokenizer.from_pretrained(model_name)
)
def embed_document(document: str, model, tokenizer) -> torch.Tensor:
inputs = tokenizer(
document,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
)
with torch.no_grad():
outputs = model(**inputs)
return outputs.last_hidden_state[:, 0, :]
def answer_question(question: str, document_embeddings: torch.Tensor, model, tokenizer) -> str:
inputs = tokenizer(
question,
return_tensors="pt",
truncation=True,
max_length=512,
padding=True
)
with torch.no_grad():
outputs = model(**inputs)
question_embeddings = outputs.last_hidden_state[:, 0, :]
similarity = torch.cosine_similarity(document_embeddings, question_embeddings)
if similarity.item() > 0.5:
return f"Similarity score: {similarity.item():.2f}"
return "Sorry, I couldn't find a relevant answer in the document."
def main():
st.title("Document Chatbot")
# Load model and tokenizer
try:
model, tokenizer = load_model()
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return
# File upload - restrict to txt files for simplicity
document_file = st.file_uploader(
"Upload a text document (txt)",
type=["txt"],
help="Please upload a text file to analyze"
)
if document_file is not None:
try:
document = document_file.read().decode("utf-8")
st.success("Document uploaded successfully!")
# Create document embeddings
document_embeddings = embed_document(document, model, tokenizer)
# Question input
st.subheader("Ask a question")
question = st.text_input("Enter your question about the document:")
if question:
with st.spinner("Finding answer..."):
answer = answer_question(question, document_embeddings, model, tokenizer)
st.write(answer)
except Exception as e:
st.error(f"Error processing document: {str(e)}")
if __name__ == "__main__":
main()