Spaces:
Runtime error
Runtime error
first commit
Browse files- app.py +181 -0
- logo.png +0 -0
- requirements.txt +12 -0
- utlis/__init__.py +0 -0
- utlis/__pycache__/__init__.cpython-310.pyc +0 -0
- utlis/__pycache__/__init__.cpython-39.pyc +0 -0
- utlis/__pycache__/constant.cpython-310.pyc +0 -0
- utlis/__pycache__/constant.cpython-39.pyc +0 -0
- utlis/__pycache__/helper.cpython-310.pyc +0 -0
- utlis/__pycache__/helper.cpython-39.pyc +0 -0
- utlis/constant.py +34 -0
- utlis/helper.py +210 -0
app.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utlis.helper import *
|
2 |
+
import sqlite3
|
3 |
+
import hashlib
|
4 |
+
import io
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
initialize_session_state()
|
9 |
+
|
10 |
+
with st.sidebar:
|
11 |
+
st.image("logo.png", width=170)
|
12 |
+
st.title("Config Settings")
|
13 |
+
# Get List of models
|
14 |
+
|
15 |
+
st.session_state.genre = st.radio(
|
16 |
+
"Choose option",
|
17 |
+
[ "Select Service", "Add service"])#,"Delete service"])
|
18 |
+
|
19 |
+
if st.session_state.genre=="Add service":
|
20 |
+
st.title('Add service')
|
21 |
+
# Check service status
|
22 |
+
# Get all available services
|
23 |
+
add_new_service = st.checkbox("Add new service")
|
24 |
+
if add_new_service:
|
25 |
+
new_service = st.text_input("Enter service name")
|
26 |
+
# Get list of Embedding models
|
27 |
+
|
28 |
+
if new_service and st.button('Add'):
|
29 |
+
add_service(st.session_state.token,new_service)
|
30 |
+
data = {"token": st.session_state.token}
|
31 |
+
json_data = json.dumps(data)
|
32 |
+
headers = {'Content-Type': 'application/json'}
|
33 |
+
services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
34 |
+
services =json.loads(services.text)
|
35 |
+
if len(services)>0:
|
36 |
+
st.session_state.service = st.selectbox("Choose Service",services)
|
37 |
+
|
38 |
+
st.session_state.uploaded_files = st.file_uploader("Upload CVs", type=["pdf","doc"], accept_multiple_files=True)
|
39 |
+
if st.session_state.uploaded_files:
|
40 |
+
st.session_state.process = st.button('Process')
|
41 |
+
if st.session_state.process:
|
42 |
+
with st.spinner("Processing..."):
|
43 |
+
add_document(st.session_state.token,st.session_state.service)
|
44 |
+
|
45 |
+
elif st.session_state.genre=="Select Service":
|
46 |
+
st.title('Choose Service')
|
47 |
+
data = {"token": st.session_state.token}
|
48 |
+
json_data = json.dumps(data)
|
49 |
+
headers = {'Content-Type': 'application/json'}
|
50 |
+
services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
51 |
+
services =json.loads(services.text)
|
52 |
+
|
53 |
+
if len(services)>0:
|
54 |
+
st.session_state.service_slected_to_chat = st.selectbox(" ",services)
|
55 |
+
st.session_state.top_k=st.slider("Number of Candidates", min_value=1, max_value=10, value=3, step=1)
|
56 |
+
# data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat}
|
57 |
+
# json_data = json.dumps(data)
|
58 |
+
# headers = {'Content-Type': 'application/json'}
|
59 |
+
# history_document = requests.get(DOCUMENT_API,data=json_data, headers=headers)
|
60 |
+
# history_document =json.loads(history_document.text).get("documents",[])
|
61 |
+
# history_document = [doc["documentname"] for doc in history_document]
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
# elif st.session_state.genre == "Delete service":
|
66 |
+
# st.title('Delete Service')
|
67 |
+
# data = {"token": st.session_state.token}
|
68 |
+
# json_data = json.dumps(data)
|
69 |
+
# headers = {'Content-Type': 'application/json'}
|
70 |
+
# services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
71 |
+
# services =json.loads(services.text)
|
72 |
+
# if len(services)>=2:
|
73 |
+
# services.append("ALL")
|
74 |
+
# # Get list of documents from histrory
|
75 |
+
# if "ALL" in services:
|
76 |
+
# service_slected = st.multiselect(
|
77 |
+
# "",services ,default="ALL"
|
78 |
+
# )
|
79 |
+
# elif len(services)==1:
|
80 |
+
# service_slected = st.multiselect(
|
81 |
+
# "",services,default=services[0]
|
82 |
+
# )
|
83 |
+
# else:
|
84 |
+
# service_slected = st.multiselect(
|
85 |
+
# "",services
|
86 |
+
# )
|
87 |
+
# if "ALL" in service_slected:
|
88 |
+
# service_slected = services
|
89 |
+
# service_slected.remove("ALL")
|
90 |
+
# st.write("You selected:", service_slected)
|
91 |
+
|
92 |
+
# if len(service_slected) > 0:
|
93 |
+
# st.session_state.delete = st.button('Delete')
|
94 |
+
# if st.session_state.delete:
|
95 |
+
# delete_service(st.session_state.token ,service_slected)
|
96 |
+
|
97 |
+
# elif st.session_state.genre == "Delete CV(s)":
|
98 |
+
# st.title('Delete CV(s)')
|
99 |
+
# data = {"token": st.session_state.token}
|
100 |
+
# json_data = json.dumps(data)
|
101 |
+
# headers = {'Content-Type': 'application/json'}
|
102 |
+
# services = requests.get(SERVICES_API,data=json_data, headers=headers)
|
103 |
+
# services =json.loads(services.text)
|
104 |
+
# if len(services)>0:
|
105 |
+
# service = st.selectbox("Choose Service",services)
|
106 |
+
# data = {"token": st.session_state.token, "servicename": service}
|
107 |
+
# json_data = json.dumps(data)
|
108 |
+
# headers = {'Content-Type': 'application/json'}
|
109 |
+
|
110 |
+
|
111 |
+
# st.write("You selected:", document_slected_to_delete)
|
112 |
+
# if len(document_slected_to_delete) > 0:
|
113 |
+
# st.session_state.delete = st.button('Delete')
|
114 |
+
# if st.session_state.delete:
|
115 |
+
# delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)
|
116 |
+
|
117 |
+
# css_style = """
|
118 |
+
# <style>
|
119 |
+
# .title {
|
120 |
+
# white-space: nowrap;
|
121 |
+
# }
|
122 |
+
# </style>
|
123 |
+
# """
|
124 |
+
|
125 |
+
# st.markdown(css_style, unsafe_allow_html=True)
|
126 |
+
st.markdown("""
|
127 |
+
<style>
|
128 |
+
.st-bm {
|
129 |
+
color: #1E90FF; /* DodgerBlue color */
|
130 |
+
}
|
131 |
+
.card {
|
132 |
+
border-radius: 10px;
|
133 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
134 |
+
padding: 15px;
|
135 |
+
margin: 10px 0;
|
136 |
+
transition: box-shadow 0.3s ease-in-out;
|
137 |
+
}
|
138 |
+
.card:hover {
|
139 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
|
140 |
+
}
|
141 |
+
.highlight {
|
142 |
+
color: #1E90FF; /* Custom color */
|
143 |
+
}
|
144 |
+
</style>
|
145 |
+
""", unsafe_allow_html=True)
|
146 |
+
|
147 |
+
with st.container():
|
148 |
+
st.markdown('<h1 class="title">SmartHire Matcher</h1>', unsafe_allow_html=True)
|
149 |
+
col1, col2 = st.columns([3, 1])
|
150 |
+
if st.session_state.genre=="Select Service" and st.session_state.service_slected_to_chat:
|
151 |
+
query = st.text_area("Add description of your offer:", height=300)
|
152 |
+
|
153 |
+
if query and st.button('Process') :
|
154 |
+
with st.spinner("Finding Matching CVs..."):
|
155 |
+
results = search_document( index_name= "cvindex",token= "abcd",service_name= st.session_state.service_slected_to_chat,query= query,top_k= st.session_state.top_k)
|
156 |
+
st.session_state.results_str = results.decode('utf-8')
|
157 |
+
|
158 |
+
# Displaying results
|
159 |
+
try:
|
160 |
+
if st.session_state.results_str: # check if there are results
|
161 |
+
results = json.loads(st.session_state.results_str)
|
162 |
+
|
163 |
+
for index, item in enumerate(results):
|
164 |
+
with st.container():
|
165 |
+
col1, col2 = st.columns([3, 1])
|
166 |
+
with col1:
|
167 |
+
st.markdown(f"<div class='card'><h3><span class='highlight'>Top:</span> {index+1}</h3><p><span class='highlight'>Score:</span> {round(item['score']*100, 3)}%</p><p><span class='highlight'>Document Name:</span> {item['documentname'].replace('_pdf', ' ')}</p></div>", unsafe_allow_html=True)
|
168 |
+
|
169 |
+
#st.markdown(f"**Tag:** {item['tag']}")
|
170 |
+
#st.markdown(f"**Score:** {round(item['score'], 3)}")
|
171 |
+
#st.markdown(f"**Document Name:** {item['documentname'].replace('_pdf', ' ')}")
|
172 |
+
with col2:
|
173 |
+
b64_pdf = item['encoded_cv']
|
174 |
+
pdf = base64.b64decode(b64_pdf)
|
175 |
+
pdf_file = io.BytesIO(pdf)
|
176 |
+
st.download_button("Download CV", data=pdf_file, file_name=item['documentname'].replace('_', '.'), mime='application/pdf', key=f"download_{index}_{item['documentname']}")
|
177 |
+
if not results:
|
178 |
+
st.error("No results found.")
|
179 |
+
except Exception as e:
|
180 |
+
st.error("Failed to load results. Please try again later."+ str(e))
|
181 |
+
|
logo.png
ADDED
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn[standard]
|
3 |
+
pydantic
|
4 |
+
requests
|
5 |
+
typing
|
6 |
+
redis
|
7 |
+
numpy
|
8 |
+
gevent
|
9 |
+
PyPDF2
|
10 |
+
pdfplumber
|
11 |
+
openai
|
12 |
+
google-generativeai==0.7.0
|
utlis/__init__.py
ADDED
File without changes
|
utlis/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (158 Bytes). View file
|
|
utlis/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (140 Bytes). View file
|
|
utlis/__pycache__/constant.cpython-310.pyc
ADDED
Binary file (857 Bytes). View file
|
|
utlis/__pycache__/constant.cpython-39.pyc
ADDED
Binary file (842 Bytes). View file
|
|
utlis/__pycache__/helper.cpython-310.pyc
ADDED
Binary file (5.25 kB). View file
|
|
utlis/__pycache__/helper.cpython-39.pyc
ADDED
Binary file (7.37 kB). View file
|
|
utlis/constant.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# IP_WEB_SERVER = "https://f564-196-65-150-53.ngrok-free.app"
|
3 |
+
# IP_MODEL_SERVER = "https://fluffy-mole-81.telebit.io"
|
4 |
+
IP_WEB_SERVER = "https://36c9-41-248-140-202.ngrok-free.app"
|
5 |
+
#IP_WEB_SERVER = "http://localhost:8000"
|
6 |
+
SERVICES_API = IP_WEB_SERVER+"/services/"
|
7 |
+
ADD_SERVICES_API = IP_WEB_SERVER+"/add_services"
|
8 |
+
ADD_STORE_DOCUMENT = IP_WEB_SERVER+"/add_and_store_document"
|
9 |
+
SEARCH_API = IP_WEB_SERVER+"/serach"
|
10 |
+
DOCUMENT_API = IP_WEB_SERVER+"/documents"
|
11 |
+
REMOVE_DOCUMENTS_API = IP_WEB_SERVER+"/remove_documents"
|
12 |
+
REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
|
13 |
+
GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
|
14 |
+
RESPONSE_API = IP_WEB_SERVER+"/structure_response"
|
15 |
+
RESPONSE_TXT_API = IP_WEB_SERVER+"/structure_response_text"
|
16 |
+
DEFAULT_SCHEMA = {
|
17 |
+
"GeographicContext": "<variable>",
|
18 |
+
"SubGeographicContext": "<variable>",
|
19 |
+
"Channel": "<variable>",
|
20 |
+
"RateType": "<variable>",
|
21 |
+
"Notes": ["<variable>"],
|
22 |
+
"Rates": [
|
23 |
+
{
|
24 |
+
"PaymentProduct": "<variable>",
|
25 |
+
"Details": [
|
26 |
+
{
|
27 |
+
"FeeTier": "<variable>",
|
28 |
+
"IRD": ["<variable>"],
|
29 |
+
"Rate": "<variable>"
|
30 |
+
},
|
31 |
+
]
|
32 |
+
},
|
33 |
+
]
|
34 |
+
}
|
utlis/helper.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pdfplumber
|
2 |
+
import streamlit as st
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import redis
|
6 |
+
import redis.commands.search
|
7 |
+
from redis.commands.search.field import TagField, VectorField, TextField
|
8 |
+
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
9 |
+
import logging
|
10 |
+
from redis.commands.search.query import Query
|
11 |
+
import numpy as np
|
12 |
+
from typing import List, Dict, Any
|
13 |
+
from utlis.constant import *
|
14 |
+
from PIL import Image
|
15 |
+
import google.generativeai as genai
|
16 |
+
genai.configure(api_key="AIzaSyAhz9UBzkEIYI886zZRm40qqB1Kd_9Y4-0")
|
17 |
+
import base64
|
18 |
+
import sqlite3
|
19 |
+
def initialize_session_state():
|
20 |
+
if "doc_ortext" not in st.session_state:
|
21 |
+
st.session_state["doc_ortext"] = None
|
22 |
+
if "token" not in st.session_state:
|
23 |
+
st.session_state["token"] ="abcd"
|
24 |
+
if "service" not in st.session_state:
|
25 |
+
st.session_state["service"] = None
|
26 |
+
if "results_str" not in st.session_state:
|
27 |
+
st.session_state.results_str = False
|
28 |
+
if "service_slected_to_chat" not in st.session_state:
|
29 |
+
st.session_state.service_slected_to_chat = False
|
30 |
+
if "embdding_model" not in st.session_state:
|
31 |
+
st.session_state["embdding_model"] = None
|
32 |
+
if "indexing_method" not in st.session_state:
|
33 |
+
st.session_state["indexing_method"] = None
|
34 |
+
if "uploaded_files" not in st.session_state:
|
35 |
+
st.session_state["uploaded_files"] = None
|
36 |
+
|
37 |
+
if "messages" not in st.session_state:
|
38 |
+
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
|
39 |
+
|
40 |
+
|
41 |
+
def extract_text_from_pdf(pdf_path):
|
42 |
+
text=""
|
43 |
+
with pdfplumber.open(pdf_path) as pdf:
|
44 |
+
for page_number, page in enumerate(pdf.pages, start=1):
|
45 |
+
# Try to extract the text
|
46 |
+
text+= page.extract_text(x_tolerance=2, y_tolerance=4, layout=True, x_density=5, y_density=10)
|
47 |
+
return text
|
48 |
+
|
49 |
+
def delete_service(token,service_slected_to_delete):
|
50 |
+
for srevice_name in service_slected_to_delete:
|
51 |
+
url = REMOVE_SERVICE_API
|
52 |
+
# JSON payload to be sent
|
53 |
+
data = {
|
54 |
+
"token": token,
|
55 |
+
"servicename": srevice_name
|
56 |
+
}
|
57 |
+
json_data = json.dumps(data)
|
58 |
+
|
59 |
+
# Set the headers to specify that the content type is JSON
|
60 |
+
headers = {'Content-Type': 'application/json'}
|
61 |
+
|
62 |
+
# Send the POST request
|
63 |
+
response = requests.delete(url, data=json_data, headers=headers)
|
64 |
+
if json.loads( response.text).get("success")==True:
|
65 |
+
st.success(f"{srevice_name} deleted successfully")
|
66 |
+
else:
|
67 |
+
st.error(f"{srevice_name} not deleted successfully")
|
68 |
+
|
69 |
+
def delete_document(token, service,document_slected_to_delete):
|
70 |
+
print(document_slected_to_delete)
|
71 |
+
# for document_name in document_slected_to_delete:
|
72 |
+
url = REMOVE_DOCUMENTS_API
|
73 |
+
# JSON payload to be sent
|
74 |
+
data = {
|
75 |
+
"token": token,
|
76 |
+
"service_name": service,
|
77 |
+
"document_names":document_slected_to_delete
|
78 |
+
}
|
79 |
+
|
80 |
+
# Convert the dictionary to a JSON formatted string
|
81 |
+
json_data = json.dumps(data)
|
82 |
+
# Set the headers to specify that the content type is JSON
|
83 |
+
headers = {'Content-Type': 'application/json'}
|
84 |
+
|
85 |
+
# Send the POST request
|
86 |
+
response = requests.delete(url, data=json_data, headers=headers)
|
87 |
+
print(response)
|
88 |
+
if json.loads( response.text).get("status")=="success":
|
89 |
+
st.success("document(s) deleted successfully")
|
90 |
+
else:
|
91 |
+
st.error("document(s) not deleted successfully")
|
92 |
+
def gemini_vision(file):
|
93 |
+
load_image = Image.open(file)
|
94 |
+
prompt= "please extract all text fromt this image"
|
95 |
+
model = genai.GenerativeModel('gemini-pro-vision')
|
96 |
+
response = model.generate_content([prompt, load_image])
|
97 |
+
|
98 |
+
return response.text
|
99 |
+
def add_service(token,servicename):
|
100 |
+
url = ADD_SERVICES_API
|
101 |
+
# JSON payload to be sent
|
102 |
+
data = {
|
103 |
+
"token": token,
|
104 |
+
"services": [
|
105 |
+
{
|
106 |
+
"servicename": servicename
|
107 |
+
}
|
108 |
+
]
|
109 |
+
}
|
110 |
+
|
111 |
+
# Convert the dictionary to a JSON formatted string
|
112 |
+
json_data = json.dumps(data)
|
113 |
+
|
114 |
+
# Set the headers to specify that the content type is JSON
|
115 |
+
headers = {'Content-Type': 'application/json'}
|
116 |
+
|
117 |
+
# Send the POST request
|
118 |
+
response = requests.post(url, data=json_data, headers=headers)
|
119 |
+
if json.loads( response.text).get("added_services",None):
|
120 |
+
st.success(f"{servicename} added successfully")
|
121 |
+
else:
|
122 |
+
st.error(json.loads( response.text).get("message",None))
|
123 |
+
def add_text_document(token, servicename):
|
124 |
+
# Retrieve text and document name from session state
|
125 |
+
document_text = st.session_state.text_area
|
126 |
+
document_name = st.session_state.name_text_area.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_").replace(".", "_")
|
127 |
+
|
128 |
+
# Encode the document text as Base64
|
129 |
+
encoded_text = base64.b64encode(document_text.encode('utf-8')).decode('utf-8')
|
130 |
+
|
131 |
+
url = ADD_STORE_DOCUMENT
|
132 |
+
# Prepare the JSON payload
|
133 |
+
data = {
|
134 |
+
"token": token,
|
135 |
+
"service_name": servicename,
|
136 |
+
"document_name": document_name,
|
137 |
+
"file": encoded_text # Assuming the API can handle Base64 encoded text under the 'file' key
|
138 |
+
}
|
139 |
+
|
140 |
+
# Convert the dictionary to a JSON formatted string and send the POST request
|
141 |
+
headers = {'Content-Type': 'application/json'}
|
142 |
+
response = requests.post(url, data=json.dumps(data), headers=headers)
|
143 |
+
status = json.loads(response.text).get("status")
|
144 |
+
|
145 |
+
if status == "success":
|
146 |
+
st.success(f"{document_name} uploaded successfully as text")
|
147 |
+
else:
|
148 |
+
st.error(f"{document_name} not uploaded successfully")
|
149 |
+
|
150 |
+
def add_document(token,servicename):
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
files = st.session_state.uploaded_files
|
155 |
+
|
156 |
+
for file in files:
|
157 |
+
url = ADD_STORE_DOCUMENT
|
158 |
+
# JSON payload to be sent
|
159 |
+
document_name = file.name.replace(" ","")
|
160 |
+
#document_name = document_name.replace(".pdf","")
|
161 |
+
document_name = document_name.replace("(","_")
|
162 |
+
document_name = document_name.replace(")","_")
|
163 |
+
document_name = document_name.replace("-","_")
|
164 |
+
document_name = document_name.replace(".","_")
|
165 |
+
encoded_file = base64.b64encode(file.read()).decode('utf-8')
|
166 |
+
data = {
|
167 |
+
"token": token,
|
168 |
+
"service_name": servicename,
|
169 |
+
"document_name": document_name,
|
170 |
+
"file":encoded_file
|
171 |
+
}
|
172 |
+
|
173 |
+
# Convert the dictionary to a JSON formatted string
|
174 |
+
json_data = json.dumps(data)
|
175 |
+
|
176 |
+
# Set the headers to specify that the content type is JSON
|
177 |
+
headers = {'Content-Type': 'application/json'}
|
178 |
+
|
179 |
+
# Send the POST request
|
180 |
+
response = requests.post(url, data=json_data, headers=headers)
|
181 |
+
document_name = file.name.replace(" ","_")
|
182 |
+
if json.loads( response.content).get("status")=="success":
|
183 |
+
st.success(f"{document_name} added successfully")
|
184 |
+
else:
|
185 |
+
st.error(f"{document_name} not added successfully")
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
+
def search_document(index_name,token,service_name,query, top_k ):
|
191 |
+
url = SEARCH_API
|
192 |
+
print(url)
|
193 |
+
# JSON payload to be sent
|
194 |
+
data = {
|
195 |
+
"index_name": index_name,
|
196 |
+
"token": token,
|
197 |
+
"service_name": service_name,
|
198 |
+
"query": query,
|
199 |
+
"top_k": top_k
|
200 |
+
}
|
201 |
+
# Convert the dictionary to a JSON formatted string
|
202 |
+
json_data = json.dumps(data)
|
203 |
+
|
204 |
+
# Set the headers to specify that the content type is JSON
|
205 |
+
headers = {'Content-Type': 'application/json'}
|
206 |
+
|
207 |
+
# Send the POST request
|
208 |
+
response = requests.post(url, data=json_data, headers=headers)
|
209 |
+
return response.content
|
210 |
+
|