Spaces:

Nechba
/

SmartHire-Matcher

Runtime error

App Files Files Community

Nechba commited on Oct 20, 2024

Commit

7aaad1d

1 Parent(s): 5c51e85

first commit

Browse files

Files changed (12) hide show

app.py +181 -0
logo.png +0 -0
requirements.txt +12 -0
utlis/__init__.py +0 -0
utlis/__pycache__/__init__.cpython-310.pyc +0 -0
utlis/__pycache__/__init__.cpython-39.pyc +0 -0
utlis/__pycache__/constant.cpython-310.pyc +0 -0
utlis/__pycache__/constant.cpython-39.pyc +0 -0
utlis/__pycache__/helper.cpython-310.pyc +0 -0
utlis/__pycache__/helper.cpython-39.pyc +0 -0
utlis/constant.py +34 -0
utlis/helper.py +210 -0

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from utlis.helper import *
+import sqlite3
+import hashlib
+import io
+initialize_session_state()
+with st.sidebar:
+    st.image("logo.png", width=170)
+    st.title("Config Settings")
+    # Get List of models
+    st.session_state.genre = st.radio(
+    "Choose option",
+    [ "Select Service", "Add service"])#,"Delete service"])
+    if st.session_state.genre=="Add service":
+        st.title('Add service')
+        # Check service status
+        # Get all available services
+        add_new_service = st.checkbox("Add new service")
+        if add_new_service:
+            new_service = st.text_input("Enter service name")
+            # Get list of Embedding models
+            if  new_service and st.button('Add'):
+                add_service(st.session_state.token,new_service)
+        data = {"token": st.session_state.token}
+        json_data = json.dumps(data)
+        headers = {'Content-Type': 'application/json'}
+        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
+        services =json.loads(services.text)
+        if len(services)>0:
+            st.session_state.service = st.selectbox("Choose Service",services)
+            st.session_state.uploaded_files = st.file_uploader("Upload CVs",  type=["pdf","doc"], accept_multiple_files=True)
+            if st.session_state.uploaded_files:
+                st.session_state.process = st.button('Process')
+                if st.session_state.process:
+                    with st.spinner("Processing..."):
+                        add_document(st.session_state.token,st.session_state.service)
+    elif st.session_state.genre=="Select Service":
+        st.title('Choose Service')
+        data = {"token": st.session_state.token}
+        json_data = json.dumps(data)
+        headers = {'Content-Type': 'application/json'}
+        services  = requests.get(SERVICES_API,data=json_data, headers=headers)
+        services =json.loads(services.text)
+        if len(services)>0:
+            st.session_state.service_slected_to_chat = st.selectbox(" ",services)
+            st.session_state.top_k=st.slider("Number of Candidates", min_value=1, max_value=10, value=3, step=1)
+            # data = {"token": st.session_state.token, "servicename": st.session_state.service_slected_to_chat}
+            # json_data = json.dumps(data)
+            # headers = {'Content-Type': 'application/json'}
+            # history_document  = requests.get(DOCUMENT_API,data=json_data, headers=headers)
+            # history_document =json.loads(history_document.text).get("documents",[])
+            # history_document = [doc["documentname"] for doc in history_document]
+    # elif st.session_state.genre == "Delete service":
+    #     st.title('Delete Service')
+    #     data = {"token": st.session_state.token}
+    #     json_data = json.dumps(data)
+    #     headers = {'Content-Type': 'application/json'}
+    #     services  = requests.get(SERVICES_API,data=json_data, headers=headers)
+    #     services =json.loads(services.text)
+    #     if len(services)>=2:
+    #         services.append("ALL")
+    #         # Get list of documents from histrory
+    #     if "ALL" in services:
+    #         service_slected = st.multiselect(
+    #                 "",services ,default="ALL"
+    #                 )
+    #     elif len(services)==1:
+    #         service_slected = st.multiselect(
+    #                 "",services,default=services[0]
+    #                 )
+    #     else:
+    #         service_slected = st.multiselect(
+    #                 "",services
+    #                 )
+    #     if "ALL" in service_slected:
+    #         service_slected = services
+    #         service_slected.remove("ALL")
+    #     st.write("You selected:", service_slected)
+    #     if len(service_slected) > 0:
+    #         st.session_state.delete = st.button('Delete')
+    #         if st.session_state.delete:
+    #             delete_service(st.session_state.token ,service_slected)
+    # elif st.session_state.genre == "Delete CV(s)":
+    #     st.title('Delete CV(s)')
+    #     data = {"token": st.session_state.token}
+    #     json_data = json.dumps(data)
+    #     headers = {'Content-Type': 'application/json'}
+    #     services  = requests.get(SERVICES_API,data=json_data, headers=headers)
+    #     services =json.loads(services.text)
+    #     if len(services)>0:
+    #         service = st.selectbox("Choose Service",services)
+    #         data = {"token": st.session_state.token, "servicename": service}
+    #         json_data = json.dumps(data)
+    #         headers = {'Content-Type': 'application/json'}
+    #         st.write("You selected:", document_slected_to_delete)
+    #         if len(document_slected_to_delete) > 0:
+    #             st.session_state.delete = st.button('Delete')
+    #             if st.session_state.delete:
+    #                 delete_document(st.session_state.token,st.session_state.service ,document_slected_to_delete)
+# css_style = """
+# <style>
+# .title {
+#     white-space: nowrap;
+# }
+# </style>
+# """
+# st.markdown(css_style, unsafe_allow_html=True)
+st.markdown("""
+    <style>
+    .st-bm {
+        color: #1E90FF; /* DodgerBlue color */
+    }
+            .card {
+    border-radius: 10px;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    padding: 15px;
+    margin: 10px 0;
+    transition: box-shadow 0.3s ease-in-out;
+}
+.card:hover {
+    box-shadow: 0 4px 8px rgba(0,0,0,0.2);
+}
+.highlight {
+    color:  #1E90FF; /* Custom color */
+}
+    </style>
+    """, unsafe_allow_html=True)
+with st.container():
+    st.markdown('<h1 class="title">SmartHire Matcher</h1>', unsafe_allow_html=True)
+    col1, col2 = st.columns([3, 1])
+    if st.session_state.genre=="Select Service" and st.session_state.service_slected_to_chat:
+        query = st.text_area("Add description of your offer:", height=300)
+        if query and st.button('Process') :
+            with st.spinner("Finding Matching CVs..."):
+                results = search_document( index_name= "cvindex",token= "abcd",service_name= st.session_state.service_slected_to_chat,query= query,top_k= st.session_state.top_k)
+                st.session_state.results_str = results.decode('utf-8')
+            # Displaying results
+        try:
+            if st.session_state.results_str:  # check if there are results
+                results = json.loads(st.session_state.results_str)
+                for index, item in enumerate(results):
+                    with st.container():
+                        col1, col2 = st.columns([3, 1])
+                        with col1:
+                            st.markdown(f"<div class='card'><h3><span class='highlight'>Top:</span> {index+1}</h3><p><span class='highlight'>Score:</span> {round(item['score']*100, 3)}%</p><p><span class='highlight'>Document Name:</span> {item['documentname'].replace('_pdf', ' ')}</p></div>", unsafe_allow_html=True)
+                            #st.markdown(f"**Tag:** {item['tag']}")
+                            #st.markdown(f"**Score:** {round(item['score'], 3)}")
+                            #st.markdown(f"**Document Name:** {item['documentname'].replace('_pdf', ' ')}")
+                        with col2:
+                            b64_pdf = item['encoded_cv']
+                            pdf = base64.b64decode(b64_pdf)
+                            pdf_file = io.BytesIO(pdf)
+                            st.download_button("Download CV", data=pdf_file, file_name=item['documentname'].replace('_', '.'), mime='application/pdf', key=f"download_{index}_{item['documentname']}")
+                if not results:
+                    st.error("No results found.")
+        except Exception as e:
+            st.error("Failed to load results. Please try again later."+ str(e))

logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi
+uvicorn[standard]
+pydantic
+requests
+typing
+redis
+numpy
+gevent
+PyPDF2
+pdfplumber
+openai
+google-generativeai==0.7.0

utlis/__init__.py ADDED Viewed

File without changes

utlis/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (158 Bytes). View file

utlis/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (140 Bytes). View file

utlis/__pycache__/constant.cpython-310.pyc ADDED Viewed

Binary file (857 Bytes). View file

utlis/__pycache__/constant.cpython-39.pyc ADDED Viewed

Binary file (842 Bytes). View file

utlis/__pycache__/helper.cpython-310.pyc ADDED Viewed

Binary file (5.25 kB). View file

utlis/__pycache__/helper.cpython-39.pyc ADDED Viewed

Binary file (7.37 kB). View file

utlis/constant.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# IP_WEB_SERVER = "https://f564-196-65-150-53.ngrok-free.app"
+# IP_MODEL_SERVER = "https://fluffy-mole-81.telebit.io"
+IP_WEB_SERVER = "https://36c9-41-248-140-202.ngrok-free.app"
+#IP_WEB_SERVER = "http://localhost:8000"
+SERVICES_API = IP_WEB_SERVER+"/services/"
+ADD_SERVICES_API = IP_WEB_SERVER+"/add_services"
+ADD_STORE_DOCUMENT = IP_WEB_SERVER+"/add_and_store_document"
+SEARCH_API = IP_WEB_SERVER+"/serach"
+DOCUMENT_API = IP_WEB_SERVER+"/documents"
+REMOVE_DOCUMENTS_API = IP_WEB_SERVER+"/remove_documents"
+REMOVE_SERVICE_API = IP_WEB_SERVER+"/remove_service"
+GET_NUM_PAGES = IP_WEB_SERVER+"/get_num_pages"
+RESPONSE_API = IP_WEB_SERVER+"/structure_response"
+RESPONSE_TXT_API = IP_WEB_SERVER+"/structure_response_text"
+DEFAULT_SCHEMA = {
+    "GeographicContext": "<variable>",
+    "SubGeographicContext": "<variable>",
+    "Channel": "<variable>",
+    "RateType": "<variable>",
+    "Notes": ["<variable>"],
+    "Rates": [
+        {
+            "PaymentProduct": "<variable>",
+            "Details": [
+                {
+                    "FeeTier": "<variable>",
+                    "IRD": ["<variable>"],
+                    "Rate": "<variable>"
+                },
+            ]
+        },
+    ]
+}

utlis/helper.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import pdfplumber
+import streamlit as st
+import requests
+import json
+import redis
+import redis.commands.search
+from redis.commands.search.field import TagField, VectorField, TextField
+from redis.commands.search.indexDefinition import IndexDefinition, IndexType
+import logging
+from redis.commands.search.query import Query
+import numpy as np
+from typing import List, Dict, Any
+from utlis.constant import *
+from PIL import Image
+import google.generativeai as genai
+genai.configure(api_key="AIzaSyAhz9UBzkEIYI886zZRm40qqB1Kd_9Y4-0")
+import base64
+import sqlite3
+def initialize_session_state():
+    if "doc_ortext" not in st.session_state:
+        st.session_state["doc_ortext"] = None
+    if "token" not in st.session_state:
+        st.session_state["token"] ="abcd"
+    if "service" not in st.session_state:
+        st.session_state["service"] = None
+    if "results_str" not in st.session_state:
+        st.session_state.results_str = False
+    if "service_slected_to_chat" not in st.session_state:
+        st.session_state.service_slected_to_chat = False
+    if "embdding_model" not in st.session_state:
+        st.session_state["embdding_model"] = None
+    if "indexing_method" not in st.session_state:
+        st.session_state["indexing_method"] = None
+    if "uploaded_files" not in st.session_state:
+        st.session_state["uploaded_files"] = None
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
+def extract_text_from_pdf(pdf_path):
+    text=""
+    with pdfplumber.open(pdf_path) as pdf:
+        for page_number, page in enumerate(pdf.pages, start=1):
+            # Try to extract the text
+            text+= page.extract_text(x_tolerance=2, y_tolerance=4, layout=True, x_density=5, y_density=10)
+    return text
+def delete_service(token,service_slected_to_delete):
+    for srevice_name in service_slected_to_delete:
+        url = REMOVE_SERVICE_API
+        # JSON payload to be sent
+        data = {
+            "token": token,
+            "servicename": srevice_name
+            }
+        json_data = json.dumps(data)
+        # Set the headers to specify that the content type is JSON
+        headers = {'Content-Type': 'application/json'}
+        # Send the POST request
+        response = requests.delete(url, data=json_data, headers=headers)
+        if json.loads( response.text).get("success")==True:
+            st.success(f"{srevice_name} deleted successfully")
+        else:
+            st.error(f"{srevice_name} not deleted successfully")
+def delete_document(token, service,document_slected_to_delete):
+        print(document_slected_to_delete)
+    # for document_name in document_slected_to_delete:
+        url = REMOVE_DOCUMENTS_API
+        # JSON payload to be sent
+        data = {
+        "token": token,
+        "service_name": service,
+        "document_names":document_slected_to_delete
+        }
+        # Convert the dictionary to a JSON formatted string
+        json_data = json.dumps(data)
+        # Set the headers to specify that the content type is JSON
+        headers = {'Content-Type': 'application/json'}
+        # Send the POST request
+        response = requests.delete(url, data=json_data, headers=headers)
+        print(response)
+        if json.loads( response.text).get("status")=="success":
+            st.success("document(s) deleted successfully")
+        else:
+            st.error("document(s) not deleted successfully")
+def gemini_vision(file):
+    load_image = Image.open(file)
+    prompt= "please extract all text fromt this image"
+    model = genai.GenerativeModel('gemini-pro-vision')
+    response = model.generate_content([prompt, load_image])
+    return response.text
+def add_service(token,servicename):
+    url = ADD_SERVICES_API
+    # JSON payload to be sent
+    data = {
+        "token": token,
+        "services": [
+            {
+                "servicename": servicename
+            }
+        ]
+    }
+    # Convert the dictionary to a JSON formatted string
+    json_data = json.dumps(data)
+    # Set the headers to specify that the content type is JSON
+    headers = {'Content-Type': 'application/json'}
+    # Send the POST request
+    response = requests.post(url, data=json_data, headers=headers)
+    if json.loads( response.text).get("added_services",None):
+        st.success(f"{servicename} added successfully")
+    else:
+        st.error(json.loads( response.text).get("message",None))
+def add_text_document(token, servicename):
+    # Retrieve text and document name from session state
+    document_text = st.session_state.text_area
+    document_name = st.session_state.name_text_area.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_").replace(".", "_")
+    # Encode the document text as Base64
+    encoded_text = base64.b64encode(document_text.encode('utf-8')).decode('utf-8')
+    url = ADD_STORE_DOCUMENT
+    # Prepare the JSON payload
+    data = {
+        "token": token,
+        "service_name": servicename,
+        "document_name": document_name,
+        "file": encoded_text  # Assuming the API can handle Base64 encoded text under the 'file' key
+    }
+    # Convert the dictionary to a JSON formatted string and send the POST request
+    headers = {'Content-Type': 'application/json'}
+    response = requests.post(url, data=json.dumps(data), headers=headers)
+    status = json.loads(response.text).get("status")
+    if status == "success":
+        st.success(f"{document_name} uploaded successfully as text")
+    else:
+        st.error(f"{document_name} not uploaded successfully")
+def add_document(token,servicename):
+        files = st.session_state.uploaded_files
+        for file in files:
+            url = ADD_STORE_DOCUMENT
+            # JSON payload to be sent
+            document_name = file.name.replace(" ","")
+            #document_name = document_name.replace(".pdf","")
+            document_name = document_name.replace("(","_")
+            document_name = document_name.replace(")","_")
+            document_name = document_name.replace("-","_")
+            document_name = document_name.replace(".","_")
+            encoded_file = base64.b64encode(file.read()).decode('utf-8')
+            data = {
+            "token": token,
+            "service_name": servicename,
+            "document_name": document_name,
+            "file":encoded_file
+            }
+            # Convert the dictionary to a JSON formatted string
+            json_data = json.dumps(data)
+            # Set the headers to specify that the content type is JSON
+            headers = {'Content-Type': 'application/json'}
+            # Send the POST request
+            response = requests.post(url, data=json_data, headers=headers)
+            document_name = file.name.replace(" ","_")
+            if json.loads( response.content).get("status")=="success":
+                st.success(f"{document_name} added successfully")
+            else:
+                st.error(f"{document_name} not added successfully")
+def search_document(index_name,token,service_name,query, top_k ):
+    url = SEARCH_API
+    print(url)
+    # JSON payload to be sent
+    data = {
+        "index_name": index_name,
+        "token": token,
+        "service_name": service_name,
+        "query": query,
+        "top_k": top_k
+        }
+    # Convert the dictionary to a JSON formatted string
+    json_data = json.dumps(data)
+    # Set the headers to specify that the content type is JSON
+    headers = {'Content-Type': 'application/json'}
+    # Send the POST request
+    response = requests.post(url, data=json_data, headers=headers)
+    return response.content