Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import pickle | |
import time | |
from langchain import OpenAI | |
from langchain.chains import RetrievalQAWithSourcesChain | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import UnstructuredURLLoader | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
openai_api_key = os.getenv('/content/openkey.env') # Correct the key name to match the environment variable | |
# Set OpenAI API key | |
os.environ['OPENAI_API_KEY'] = '/content/openkey.env' | |
st.title("RockyBot: News Research Tool 📈") | |
st.sidebar.title("News Article URLs") | |
urls = [] | |
for i in range(3): | |
url = st.sidebar.text_input(f"URL {i+1}") | |
urls.append(url) | |
process_url_clicked = st.sidebar.button("Process URLs") | |
file_path = "faiss_store_openai.pkl" | |
main_placeholder = st.empty() | |
llm = OpenAI(temperature=0.9, max_tokens=500) | |
if process_url_clicked: | |
# Load data | |
loader = UnstructuredURLLoader(urls=urls) | |
main_placeholder.text("Data Loading...Started...✅✅✅") | |
data = loader.load() | |
# Split data | |
text_splitter = RecursiveCharacterTextSplitter( | |
separators=['\n\n', '\n', '.', ','], | |
chunk_size=1000 | |
) | |
main_placeholder.text("Text Splitter...Started...✅✅✅") | |
docs = text_splitter.split_documents(data) | |
# Debugging: Print the number of documents | |
print("Number of Documents:", len(docs)) | |
if docs: | |
# Create embeddings and save to FAISS index | |
embeddings = OpenAIEmbeddings() | |
# Generate embeddings for the documents | |
doc_texts = [doc.text for doc in docs] | |
embeddings = embeddings.embed(doc_texts) | |
# Debugging: Print the number of embeddings | |
print("Number of Embeddings:", len(embeddings)) | |
if embeddings: | |
vectorstore_openai = FAISS.from_documents(docs, embeddings) | |
main_placeholder.text("Embedding Vector Started Building...✅✅✅") | |
time.sleep(2) | |
# Save the FAISS index to a pickle file | |
with open(file_path, "wb") as f: | |
pickle.dump(vectorstore_openai, f) | |
else: | |
main_placeholder.text("Embedding creation failed. No embeddings found.") | |
else: | |
main_placeholder.text("Document splitting failed. No documents found.") | |
query = main_placeholder.text_input("Question: ") | |
if query: | |
if os.path.exists(file_path): | |
with open(file_path, "rb") as f: | |
vectorstore = pickle.load(f) | |
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) | |
result = chain({"question": query}, return_only_outputs=True) | |
# Display the answer | |
st.header("Answer") | |
st.write(result["answer"]) | |
# Display sources, if available | |
sources = result.get("sources", "") | |
if sources: | |
st.subheader("Sources:") | |
sources_list = sources.split("\n") | |
for source in sources_list: | |
st.write(source) | |