Rahul Bhoyar commited on
Commit
08728cc
·
1 Parent(s): 4ddfb35

Updated files

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +101 -51
  3. requirements.txt +6 -14
.gitignore CHANGED
@@ -1 +1,2 @@
1
- venv/
 
 
1
+ venv/
2
+ data/*
app.py CHANGED
@@ -1,60 +1,110 @@
1
- import streamlit as st
2
- from PyPDF2 import PdfReader
3
- from llama_index.llms import HuggingFaceInferenceAPI
4
- from llama_index import VectorStoreIndex
5
- from llama_index.embeddings import HuggingFaceEmbedding
6
- from llama_index import ServiceContext
7
- from llama_index.schema import Document
8
-
9
-
10
- def read_pdf(uploaded_file):
11
- pdf_reader = PdfReader(uploaded_file)
12
- text = ""
13
- for page_num in range(len(pdf_reader.pages)):
14
- text += pdf_reader.pages[page_num].extract_text()
15
- return text
16
-
17
- def querying(query_engine):
18
- query = st.text_input("Enter the Query for PDF:")
19
- submit = st.button("Generate The response for the query")
20
- if submit:
21
- with st.spinner("Fetching the response..."):
22
- response = query_engine.query(query)
23
- st.write(f"**Response:** {response}")
24
-
25
-
26
- # docs = document_search.similarity_search(query_text)
27
- # output = chain.run(input_documents=docs, question=query_text)
28
- # st.write(output)
29
 
30
- def main():
31
- st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
32
- hf_token = st.text_input("Enter your Hugging Face token:")
33
- llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
34
- uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
35
 
36
- if uploaded_file is not None:
37
- file_contents = read_pdf(uploaded_file)
38
- documents = Document(text=file_contents)
39
- documents = [documents]
40
- st.success("Documents loaded successfully!")
41
 
42
 
43
- with st.spinner("Created Embedding model..."):
44
- embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
45
- service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
46
- st.success("Embedding model created successfully!")
47
 
48
- # Download embeddings from OpenAI
49
- with st.spinner("Created VectorStoreIndex..."):
50
- index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
51
- index.storage_context.persist()
52
- query_engine = index.as_query_engine()
53
- st.success("VectorStoreIndex created successfully!")
54
 
55
- querying(query_engine)
56
 
57
 
58
- if __name__ == "__main__":
59
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
1
+ # import streamlit as st
2
+ # from PyPDF2 import PdfReader
3
+ # from llama_index.llms import HuggingFaceInferenceAPI
4
+ # from llama_index import VectorStoreIndex
5
+ # from llama_index.embeddings import HuggingFaceEmbedding
6
+ # from llama_index import ServiceContext
7
+ # from llama_index.schema import Document
8
+
9
+
10
+ # def read_pdf(uploaded_file):
11
+ # pdf_reader = PdfReader(uploaded_file)
12
+ # text = ""
13
+ # for page_num in range(len(pdf_reader.pages)):
14
+ # text += pdf_reader.pages[page_num].extract_text()
15
+ # return text
16
+
17
+ # def querying(query_engine):
18
+ # query = st.text_input("Enter the Query for PDF:")
19
+ # submit = st.button("Generate The response for the query")
20
+ # if submit:
21
+ # with st.spinner("Fetching the response..."):
22
+ # response = query_engine.query(query)
23
+ # st.write(f"**Response:** {response}")
 
 
 
 
 
24
 
25
+ # def main():
26
+ # st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
27
+ # hf_token = st.text_input("Enter your Hugging Face token:")
28
+ # llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
29
+ # uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
30
 
31
+ # if uploaded_file is not None:
32
+ # file_contents = read_pdf(uploaded_file)
33
+ # documents = Document(text=file_contents)
34
+ # documents = [documents]
35
+ # st.success("Documents loaded successfully!")
36
 
37
 
38
+ # with st.spinner("Created Embedding model..."):
39
+ # embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
40
+ # service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
41
+ # st.success("Embedding model created successfully!")
42
 
43
+ # # Download embeddings from OpenAI
44
+ # with st.spinner("Created VectorStoreIndex..."):
45
+ # index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
46
+ # index.storage_context.persist()
47
+ # query_engine = index.as_query_engine()
48
+ # st.success("VectorStoreIndex created successfully!")
49
 
50
+ # querying(query_engine)
51
 
52
 
53
+ # if __name__ == "__main__":
54
+ # main()
55
+
56
+
57
+
58
+
59
+ import streamlit as st
60
+ from llama_index import SimpleDirectoryReader, VectorStoreIndex
61
+ from llama_index import ServiceContext
62
+ from llama_index.embeddings import HuggingFaceEmbedding
63
+ from llama_index.llms import HuggingFaceInferenceAPI
64
+ import os
65
+
66
+ # os.environ["GOOGLE_API_KEY"]="AIzaSyBYrZpUdTc4rumhdHajlKfwY4Kq0u6vFDs"
67
+
68
+ # Streamlit title and description
69
+ st.title("Gemini-File with Llama-Index by Rahul Bhoyar")
70
+ st.write("This app allows you to upload your own Pdf and query your document, Powered By Gemini")
71
+
72
+ hf_token = st.text_input("Enter your Hugging Face token:")
73
+
74
+
75
+ #function to save a file
76
+ def save_uploadedfile(uploadedfile):
77
+ with open(os.path.join("data",uploadedfile.name),"wb") as f:
78
+ f.write(uploadedfile.getbuffer())
79
+ return st.success("Saved File:{} to directory".format(uploadedfile.name))
80
+
81
+ # Streamlit input for user file upload
82
+ uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf'])
83
+
84
+ # Load data and configure the index
85
+ if uploaded_pdf is not None:
86
+ input_file = save_uploadedfile(uploaded_pdf)
87
+ st.write("File uploaded successfully!")
88
+ documents = SimpleDirectoryReader("data").load_data()
89
+ llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
90
+ embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
91
+
92
+ # Configure Service Context
93
+ service_context = ServiceContext.from_defaults(
94
+ llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae
95
+ )
96
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
97
+ index.storage_context.persist()
98
+ query_engine = index.as_query_engine()
99
+
100
+ # Streamlit input for user query
101
+ user_query = st.text_input("Enter your query:")
102
+
103
+ # Query engine with user input
104
+ if user_query:
105
+ response = query_engine.query(user_query)
106
+ st.markdown(f"**Response:** {response}")
107
+ else:
108
+ st.write("Please upload a file first.")
109
+
110
 
requirements.txt CHANGED
@@ -1,15 +1,7 @@
1
- langchain
2
- openai
3
- PyPDF2
4
- faiss-cpu
5
- tiktoken
6
- watchdog
7
- streamlit
8
- fitz
9
  llama-index
10
- transformers[torch]
11
- huggingface_hub[inference]
12
- beautifulsoup4
13
- unstructured
14
- watchdog
15
- transformers
 
 
 
 
 
 
 
 
 
1
  llama-index
2
+ pypdf
3
+ streamlit
4
+ huggingface_hub[inference]>=0.19.0
5
+ transformers
6
+ torch
7
+ watchdog