File size: 2,972 Bytes
4a701b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# import libraries
import vertexai
from langchain.llms import VertexAI
from langchain.embeddings import VertexAIEmbeddings

import streamlit as st
import requests
from bs4 import BeautifulSoup

from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

vertexai.init(project="agileai-poc", location="us-central1")

llm = VertexAI(
    model_name="text-bison@001",
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,)

embeddings = VertexAIEmbeddings()


def get_text(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Create a BeautifulSoup object with the HTML content
    soup = BeautifulSoup(response.content, "html.parser")

    # Find the specific element or elements containing the text you want to scrape
    # Here, we'll find all <p> tags and extract their text
    paragraphs = soup.find_all("p")

    # Loop through the paragraphs and print their text
    with open("text\\temp.txt", "w", encoding='utf-8') as file:
        # Loop through the paragraphs and write their text to the file
        for paragraph in paragraphs:
            file.write(paragraph.get_text() + "\n")


@st.cache_resource
def create_langchain_index(input_text):
    print("--indexing---")
    get_text(input_text)
    loader = TextLoader("text\\temp.txt", encoding='utf-8')
    # data = loader.load()

    index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch,
                                    embedding=embeddings).from_loaders([loader])
    # using vertex ai embeddings initialized above
    return index


@st.cache_data
def get_response(input_text, query):
    print(f"--querying---{query}")
    response = index.query(query, llm=llm)
    return response


st.title('Webpage Question and Answering')


input_text = st.text_input("Provide the link to the webpage...")

summary_response = ""
tweet_response = ""
ln_response = ""

if input_text:
    index = create_langchain_index(input_text)
    summary_query = "Write a 100 words summary of the document"
    summary_response = get_response(input_text, summary_query)

    tweet_query = "Write a twitter tweet and add hashtags"
    tweet_response = get_response(input_text, tweet_query)

    ln_query = "Write a linkedin post for the document"
    ln_response = get_response(input_text, ln_query)

    with st.expander('Page Summary'):
        st.info(summary_response)

    with st.expander('Tweet'):
        st.info(tweet_response)

    with st.expander('LinkedIn Post'):
        st.info(ln_response)

st.session_state.input_text = ''
question = st.text_input("Ask a question from the link you shared...")
if st.button("Ask"):
    if question:
        index = create_langchain_index(input_text)
        response = get_response(input_text, question)
        st.write(response)
    else:
        st.warning("Please enter a question.")