|
|
|
from transformers import AutoModel, AutoTokenizer |
|
import torch |
|
import streamlit as st |
|
import subprocess |
|
import sys |
|
|
|
st.title("Package Installation Test") |
|
|
|
|
|
st.write(f"Python version: {sys.version}") |
|
|
|
|
|
try: |
|
st.write("Attempting to install transformers...") |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"]) |
|
st.success("Transformers package installed successfully!") |
|
except Exception as e: |
|
st.error(f"Error installing transformers: {str(e)}") |
|
|
|
|
|
st.write("Installed packages:") |
|
try: |
|
installed_packages = subprocess.check_output([sys.executable, "-m", "pip", "list"]).decode() |
|
st.code(installed_packages) |
|
except Exception as e: |
|
st.error(f"Error listing packages: {str(e)}") |
|
|
|
|
|
st.set_page_config( |
|
page_title="Document Chatbot", |
|
layout="centered", |
|
initial_sidebar_state="collapsed" |
|
) |
|
|
|
@st.cache_resource |
|
def load_model(): |
|
model_name = "distilbert-base-uncased" |
|
return ( |
|
AutoModel.from_pretrained(model_name, device_map="auto"), |
|
AutoTokenizer.from_pretrained(model_name) |
|
) |
|
|
|
def embed_document(document: str, model, tokenizer) -> torch.Tensor: |
|
inputs = tokenizer( |
|
document, |
|
return_tensors="pt", |
|
truncation=True, |
|
max_length=512, |
|
padding=True |
|
) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
return outputs.last_hidden_state[:, 0, :] |
|
|
|
def answer_question(question: str, document_embeddings: torch.Tensor, model, tokenizer) -> str: |
|
inputs = tokenizer( |
|
question, |
|
return_tensors="pt", |
|
truncation=True, |
|
max_length=512, |
|
padding=True |
|
) |
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
question_embeddings = outputs.last_hidden_state[:, 0, :] |
|
|
|
similarity = torch.cosine_similarity(document_embeddings, question_embeddings) |
|
|
|
if similarity.item() > 0.5: |
|
return f"Similarity score: {similarity.item():.2f}" |
|
return "Sorry, I couldn't find a relevant answer in the document." |
|
|
|
def main(): |
|
st.title("Document Chatbot") |
|
|
|
|
|
try: |
|
model, tokenizer = load_model() |
|
except Exception as e: |
|
st.error(f"Error loading model: {str(e)}") |
|
return |
|
|
|
|
|
document_file = st.file_uploader( |
|
"Upload a text document (txt)", |
|
type=["txt"], |
|
help="Please upload a text file to analyze" |
|
) |
|
|
|
if document_file is not None: |
|
try: |
|
document = document_file.read().decode("utf-8") |
|
st.success("Document uploaded successfully!") |
|
|
|
|
|
document_embeddings = embed_document(document, model, tokenizer) |
|
|
|
|
|
st.subheader("Ask a question") |
|
question = st.text_input("Enter your question about the document:") |
|
|
|
if question: |
|
with st.spinner("Finding answer..."): |
|
answer = answer_question(question, document_embeddings, model, tokenizer) |
|
st.write(answer) |
|
|
|
except Exception as e: |
|
st.error(f"Error processing document: {str(e)}") |
|
|
|
if __name__ == "__main__": |
|
main() |