Spaces:
Runtime error
Runtime error
# import libraries | |
import vertexai | |
from langchain.llms import VertexAI | |
from langchain.embeddings import VertexAIEmbeddings | |
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
from langchain.document_loaders import TextLoader | |
from langchain.indexes import VectorstoreIndexCreator | |
from langchain.vectorstores import DocArrayInMemorySearch | |
vertexai.init(project="agileai-poc", location="us-central1") | |
llm = VertexAI( | |
model_name="text-bison@001", | |
max_output_tokens=256, | |
temperature=0.1, | |
top_p=0.8, | |
top_k=40, | |
verbose=True,) | |
embeddings = VertexAIEmbeddings() | |
def get_text(url): | |
# Send a GET request to the URL | |
response = requests.get(url) | |
# Create a BeautifulSoup object with the HTML content | |
soup = BeautifulSoup(response.content, "html.parser") | |
# Find the specific element or elements containing the text you want to scrape | |
# Here, we'll find all <p> tags and extract their text | |
paragraphs = soup.find_all("p") | |
# Loop through the paragraphs and print their text | |
with open("text\\temp.txt", "w", encoding='utf-8') as file: | |
# Loop through the paragraphs and write their text to the file | |
for paragraph in paragraphs: | |
file.write(paragraph.get_text() + "\n") | |
def create_langchain_index(input_text): | |
print("--indexing---") | |
get_text(input_text) | |
loader = TextLoader("text\\temp.txt", encoding='utf-8') | |
# data = loader.load() | |
index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch, | |
embedding=embeddings).from_loaders([loader]) | |
# using vertex ai embeddings initialized above | |
return index | |
def get_response(input_text, query): | |
print(f"--querying---{query}") | |
response = index.query(query, llm=llm) | |
return response | |
st.title('Webpage Question and Answering') | |
input_text = st.text_input("Provide the link to the webpage...") | |
summary_response = "" | |
tweet_response = "" | |
ln_response = "" | |
if input_text: | |
index = create_langchain_index(input_text) | |
summary_query = "Write a 100 words summary of the document" | |
summary_response = get_response(input_text, summary_query) | |
tweet_query = "Write a twitter tweet and add hashtags" | |
tweet_response = get_response(input_text, tweet_query) | |
ln_query = "Write a linkedin post for the document" | |
ln_response = get_response(input_text, ln_query) | |
with st.expander('Page Summary'): | |
st.info(summary_response) | |
with st.expander('Tweet'): | |
st.info(tweet_response) | |
with st.expander('LinkedIn Post'): | |
st.info(ln_response) | |
st.session_state.input_text = '' | |
question = st.text_input("Ask a question from the link you shared...") | |
if st.button("Ask"): | |
if question: | |
index = create_langchain_index(input_text) | |
response = get_response(input_text, question) | |
st.write(response) | |
else: | |
st.warning("Please enter a question.") | |