File size: 2,379 Bytes
e24982e
8c67ed3
e24982e
 
 
 
 
9eeafb7
 
e24982e
 
63614ef
e24982e
63614ef
e24982e
e6f156e
e24982e
 
 
 
63614ef
e24982e
 
 
 
 
 
 
 
 
 
 
9eeafb7
e24982e
 
 
 
 
63614ef
e24982e
 
9eeafb7
e24982e
 
 
8c67ed3
e24982e
 
 
 
 
 
 
 
 
 
8c67ed3
e24982e
 
8c67ed3
e24982e
 
 
 
8c67ed3
e24982e
 
 
8c67ed3
e24982e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Streamlit application
import streamlit as st
import os
from getpass import getpass
from transformers import pipeline

from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine

# Function to initialize the Pinecone and LlamaIndex setup
def initialize_pipeline():
    pinecone_api_key = os.getenv("PINECONE_API_KEY") 
    openai_api_key = os.getenv("OPENAI_API_KEY") 

    embed_model = OpenAIEmbedding(api_key=openai_api_key)
    pipeline = IngestionPipeline(
        transformations=[
            SemanticSplitterNodeParser(
                buffer_size=1,
                breakpoint_percentile_threshold=95,
                embed_model=embed_model,
            ),
            embed_model,
        ],
    )

    pc = PineconeGRPC(api_key=pinecone_api_key)
    index_name = "anualreport"
    pinecone_index = pc.Index(index_name)
    vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
    pinecone_index.describe_index_stats()

    if not os.getenv('OPENAI_API_KEY'):
        os.environ['OPENAI_API_KEY'] = openai_api_key

    vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
    retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
    query_engine = RetrieverQueryEngine(retriever=retriever)
    
    return query_engine

# Streamlit UI
st.title("Chat with Annual Reports")

# Initialize the query engine
query_engine = initialize_pipeline()

# Conversation model using Hugging Face transformers
conversation_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")

# User input
user_input = st.text_input("You: ", "")

if user_input:
    # Query the vector DB
    llm_query = query_engine.query(user_input)
    response = llm_query.response

    # Generate response using Hugging Face conversation model
    conversation = conversation_pipeline([user_input, response])
    bot_response = conversation[-1]["generated_text"]

    # Display response
    st.text_area("Bot: ", bot_response, height=200)