File size: 2,744 Bytes
0de74ea
 
c120ea2
0de74ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8604aa
0de74ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
from langchain_groq import ChatGroq
#from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv

load_dotenv()
#download embedding model
def download_hugging_face_embeddings():
    embeddings= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings
# Load the GROQ and OpenAI API keys
#os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")


# Initialize the LLM
llm = ChatGroq(groq_api_key=groq_api_key,
              model_name="Llama3-8b-8192")

from langchain_core.prompts import ChatPromptTemplate

prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {input} 
Only return the helpful answer below and nothing else.
Helpful answer:"""

prompt = ChatPromptTemplate.from_template(prompt_template)
def vector_embedding():
    """Embeds the documents and stores them in a FAISS vector store."""

    #embeddings = OpenAIEmbeddings()
    embeddings= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    loader = PyPDFDirectoryLoader("/kaggle/input/book-pdf-1")  # Data Ingestion
    docs = loader.load()  # Document Loading
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)  # Chunk Creation
    final_documents = text_splitter.split_documents(docs[:20])  # Splitting
    vectors = FAISS.from_documents(final_documents, embeddings)  # Vector OpenAI embeddings
    return vectors

# Get user input
prompt1 = input("Enter Your Question From Documents: ")

# Embed the documents
vectors = vector_embedding()
print("Vector Store DB Is Ready")

import time

if prompt1:
    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = vectors.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)
    start = time.process_time()
    response = retrieval_chain.invoke({'input': prompt1})
    print("Response time :", time.process_time() - start)
    print(response['answer'])

    # Print similar documents
    print("\nDocument Similarity Search:")
    for i, doc in enumerate(response["context"]):
        print(doc.page_content)
        print("--------------------------------")