Spaces:
Paused
Paused
Rahul Bhoyar
commited on
Commit
·
08728cc
1
Parent(s):
4ddfb35
Updated files
Browse files- .gitignore +2 -1
- app.py +101 -51
- requirements.txt +6 -14
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
venv/
|
|
|
|
1 |
+
venv/
|
2 |
+
data/*
|
app.py
CHANGED
@@ -1,60 +1,110 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from PyPDF2 import PdfReader
|
3 |
-
from llama_index.llms import HuggingFaceInferenceAPI
|
4 |
-
from llama_index import VectorStoreIndex
|
5 |
-
from llama_index.embeddings import HuggingFaceEmbedding
|
6 |
-
from llama_index import ServiceContext
|
7 |
-
from llama_index.schema import Document
|
8 |
-
|
9 |
-
|
10 |
-
def read_pdf(uploaded_file):
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
def querying(query_engine):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
# docs = document_search.similarity_search(query_text)
|
27 |
-
# output = chain.run(input_documents=docs, question=query_text)
|
28 |
-
# st.write(output)
|
29 |
|
30 |
-
def main():
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
56 |
|
57 |
|
58 |
-
if __name__ == "__main__":
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# from PyPDF2 import PdfReader
|
3 |
+
# from llama_index.llms import HuggingFaceInferenceAPI
|
4 |
+
# from llama_index import VectorStoreIndex
|
5 |
+
# from llama_index.embeddings import HuggingFaceEmbedding
|
6 |
+
# from llama_index import ServiceContext
|
7 |
+
# from llama_index.schema import Document
|
8 |
+
|
9 |
+
|
10 |
+
# def read_pdf(uploaded_file):
|
11 |
+
# pdf_reader = PdfReader(uploaded_file)
|
12 |
+
# text = ""
|
13 |
+
# for page_num in range(len(pdf_reader.pages)):
|
14 |
+
# text += pdf_reader.pages[page_num].extract_text()
|
15 |
+
# return text
|
16 |
+
|
17 |
+
# def querying(query_engine):
|
18 |
+
# query = st.text_input("Enter the Query for PDF:")
|
19 |
+
# submit = st.button("Generate The response for the query")
|
20 |
+
# if submit:
|
21 |
+
# with st.spinner("Fetching the response..."):
|
22 |
+
# response = query_engine.query(query)
|
23 |
+
# st.write(f"**Response:** {response}")
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
# def main():
|
26 |
+
# st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
|
27 |
+
# hf_token = st.text_input("Enter your Hugging Face token:")
|
28 |
+
# llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
|
29 |
+
# uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
|
30 |
|
31 |
+
# if uploaded_file is not None:
|
32 |
+
# file_contents = read_pdf(uploaded_file)
|
33 |
+
# documents = Document(text=file_contents)
|
34 |
+
# documents = [documents]
|
35 |
+
# st.success("Documents loaded successfully!")
|
36 |
|
37 |
|
38 |
+
# with st.spinner("Created Embedding model..."):
|
39 |
+
# embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
|
40 |
+
# service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
|
41 |
+
# st.success("Embedding model created successfully!")
|
42 |
|
43 |
+
# # Download embeddings from OpenAI
|
44 |
+
# with st.spinner("Created VectorStoreIndex..."):
|
45 |
+
# index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
|
46 |
+
# index.storage_context.persist()
|
47 |
+
# query_engine = index.as_query_engine()
|
48 |
+
# st.success("VectorStoreIndex created successfully!")
|
49 |
|
50 |
+
# querying(query_engine)
|
51 |
|
52 |
|
53 |
+
# if __name__ == "__main__":
|
54 |
+
# main()
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
import streamlit as st
|
60 |
+
from llama_index import SimpleDirectoryReader, VectorStoreIndex
|
61 |
+
from llama_index import ServiceContext
|
62 |
+
from llama_index.embeddings import HuggingFaceEmbedding
|
63 |
+
from llama_index.llms import HuggingFaceInferenceAPI
|
64 |
+
import os
|
65 |
+
|
66 |
+
# os.environ["GOOGLE_API_KEY"]="AIzaSyBYrZpUdTc4rumhdHajlKfwY4Kq0u6vFDs"
|
67 |
+
|
68 |
+
# Streamlit title and description
|
69 |
+
st.title("Gemini-File with Llama-Index by Rahul Bhoyar")
|
70 |
+
st.write("This app allows you to upload your own Pdf and query your document, Powered By Gemini")
|
71 |
+
|
72 |
+
hf_token = st.text_input("Enter your Hugging Face token:")
|
73 |
+
|
74 |
+
|
75 |
+
#function to save a file
|
76 |
+
def save_uploadedfile(uploadedfile):
|
77 |
+
with open(os.path.join("data",uploadedfile.name),"wb") as f:
|
78 |
+
f.write(uploadedfile.getbuffer())
|
79 |
+
return st.success("Saved File:{} to directory".format(uploadedfile.name))
|
80 |
+
|
81 |
+
# Streamlit input for user file upload
|
82 |
+
uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf'])
|
83 |
+
|
84 |
+
# Load data and configure the index
|
85 |
+
if uploaded_pdf is not None:
|
86 |
+
input_file = save_uploadedfile(uploaded_pdf)
|
87 |
+
st.write("File uploaded successfully!")
|
88 |
+
documents = SimpleDirectoryReader("data").load_data()
|
89 |
+
llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
|
90 |
+
embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
|
91 |
+
|
92 |
+
# Configure Service Context
|
93 |
+
service_context = ServiceContext.from_defaults(
|
94 |
+
llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae
|
95 |
+
)
|
96 |
+
index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
|
97 |
+
index.storage_context.persist()
|
98 |
+
query_engine = index.as_query_engine()
|
99 |
+
|
100 |
+
# Streamlit input for user query
|
101 |
+
user_query = st.text_input("Enter your query:")
|
102 |
+
|
103 |
+
# Query engine with user input
|
104 |
+
if user_query:
|
105 |
+
response = query_engine.query(user_query)
|
106 |
+
st.markdown(f"**Response:** {response}")
|
107 |
+
else:
|
108 |
+
st.write("Please upload a file first.")
|
109 |
+
|
110 |
|
requirements.txt
CHANGED
@@ -1,15 +1,7 @@
|
|
1 |
-
langchain
|
2 |
-
openai
|
3 |
-
PyPDF2
|
4 |
-
faiss-cpu
|
5 |
-
tiktoken
|
6 |
-
watchdog
|
7 |
-
streamlit
|
8 |
-
fitz
|
9 |
llama-index
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
llama-index
|
2 |
+
pypdf
|
3 |
+
streamlit
|
4 |
+
huggingface_hub[inference]>=0.19.0
|
5 |
+
transformers
|
6 |
+
torch
|
7 |
+
watchdog
|