sunny77 commited on
Commit
798053e
·
1 Parent(s): d9bdfc1

Added files

Browse files
Files changed (5) hide show
  1. README.md +2 -12
  2. app.py +21 -0
  3. ingest.py +43 -0
  4. requirements.txt +10 -0
  5. retrieve.py +73 -0
README.md CHANGED
@@ -1,12 +1,2 @@
1
- ---
2
- title: Retrieval Augmented Generation RAG
3
- emoji: 📈
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.29.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Retrieval-Augmented-Generation-RAG-
2
+ Simple RAG using your own pdfs without any GPU!
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from retrieve import qa_chain, process_llm_response
2
+ import streamlit as st
3
+
4
+
5
+ def main():
6
+ qa = qa_chain()
7
+ st.title('NCERT-GPT')
8
+ text_query = st.text_area('Ask any question from NCERT 11th and 12th Chemistry Texts!')
9
+ generate_response_btn = st.button('Run RAG')
10
+
11
+ st.subheader('Response')
12
+ if generate_response_btn and text_query is not None:
13
+ with st.spinner('Generating Response. Please wait...'):
14
+ text_response = qa(text_query)
15
+ if text_response:
16
+ st.write(text_response)
17
+ else:
18
+ st.error('Failed to get response')
19
+
20
+ if __name__ == "__main__":
21
+ main()
ingest.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #importing dependencies
2
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.document_loaders import PyPDFDirectoryLoader
6
+ import time
7
+
8
+ #loading data
9
+
10
+ loader = PyPDFDirectoryLoader('data/')
11
+ documents = loader.load()
12
+ print(len(documents))
13
+
14
+ #splitting
15
+
16
+ splitter = RecursiveCharacterTextSplitter(chunk_size = 10000, chunk_overlap = 500)
17
+ text_chunks = splitter.split_documents(documents)
18
+ print(len(text_chunks))
19
+
20
+ #loading HuggingFaceBGE embeddings
21
+
22
+ model_name = "BAAI/bge-small-en"
23
+ model_kwargs = {"device": "cpu"}
24
+ encode_kwargs = {"normalize_embeddings": True}
25
+ embeddings = HuggingFaceBgeEmbeddings(
26
+ model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
27
+ )
28
+
29
+ print('Embeddings loaded!')
30
+
31
+ # creating NCERT Textbooks vector database.
32
+
33
+ t1 = time.time()
34
+ persist_directory = 'dbname'
35
+ vectordb = Chroma.from_documents(
36
+ documents = text_chunks,
37
+ embedding = embeddings,
38
+ collection_metadata = {"hnsw:space": "cosine"},
39
+ persist_directory = persist_directory
40
+ )
41
+ t2 = time.time()
42
+ print('Time taken for building db : ', (t2 - t1))
43
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ chromadb
3
+ huggingface-hub
4
+ langchain
5
+ pypdf
6
+ sentence-transformers
7
+ sentencepiece
8
+ streamlit
9
+ torch
10
+ transformers
retrieve.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, pipeline
2
+ from langchain.llms import HuggingFaceHub, HuggingFacePipeline
3
+ from dotenv import load_dotenv
4
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains import RetrievalQA
7
+ import textwrap
8
+ import os
9
+
10
+
11
+ def load_vector_store():
12
+ model_name = "BAAI/bge-small-en"
13
+ model_kwargs = {"device": "cpu"}
14
+ encode_kwargs = {"normalize_embeddings": True}
15
+ embeddings = HuggingFaceBgeEmbeddings(
16
+ model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
17
+ )
18
+ print('Embeddings loaded!')
19
+ load_vector_store = Chroma(persist_directory = 'vector stores/ncertdb', embedding_function = embeddings)
20
+ print('Vector store loaded!')
21
+
22
+ retriever = load_vector_store.as_retriever(
23
+ search_kwargs = {"k" : 2},
24
+ )
25
+ return retriever
26
+
27
+
28
+ #model
29
+ def load_model():
30
+ load_dotenv()
31
+ repo_id = 'llmware/bling-sheared-llama-1.3b-0.1'
32
+ llm = HuggingFaceHub(
33
+ repo_id = repo_id,
34
+ model_kwargs = {'max_new_tokens' : 100}
35
+ )
36
+ print(llm('HI!'))
37
+ return llm
38
+
39
+
40
+ def qa_chain():
41
+ retriever = load_vector_store()
42
+ llm = load_model()
43
+ qa = RetrievalQA.from_chain_type(
44
+ llm = llm,
45
+ chain_type = 'stuff',
46
+ retriever = retriever,
47
+ return_source_documents = True,
48
+ verbose = True
49
+ )
50
+ return qa
51
+
52
+ def wrap_text_preserve_newlines(text, width=110):
53
+ # Split the input text into lines based on newline characters
54
+ lines = text.split('\n')
55
+
56
+ # Wrap each line individually
57
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
58
+
59
+ # Join the wrapped lines back together using newline characters
60
+ wrapped_text = '\n'.join(wrapped_lines)
61
+
62
+ return wrapped_text
63
+
64
+ def process_llm_response(llm_response):
65
+ print(wrap_text_preserve_newlines(llm_response['result']))
66
+ print('\n\nSources:')
67
+ for source in llm_response["source_documents"]:
68
+ print(source.metadata['source'])
69
+
70
+ qa = qa_chain()
71
+
72
+ response = qa('What are types of Embedded system?')
73
+ process_llm_response(response)