File size: 4,244 Bytes
87b3a2f
ca6370e
87b3a2f
 
 
 
3a05b97
87b3a2f
 
 
 
 
 
 
 
3a05b97
87b3a2f
 
3a05b97
 
87b3a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca6370e
87b3a2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64245e1
f854776
6814921
4ea05d0
87b3a2f
3a05b97
ba27df6
87b3a2f
 
44e7e2f
 
 
 
 
 
c64ea71
44e7e2f
c64ea71
87b3a2f
 
fee00c8
 
87b3a2f
fee00c8
 
87b3a2f
 
 
 
5f091d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from pydantic import NoneStr
import os
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from pypdf import PdfReader
import mimetypes
import validators
import requests
import tempfile
import gradio as gr
import openai

def get_empty_state():
    return {"knowledge_base": None}


def create_knowledge_base(docs):
    # split into chunks
    text_splitter = CharacterTextSplitter(
        separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
    )
    chunks = text_splitter.split_documents(docs)

    # Create embeddings
    embeddings = OpenAIEmbeddings()
    knowledge_base = FAISS.from_documents(chunks, embeddings)
    return knowledge_base


def upload_file(file_obj):
    try:
      loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
      docs = loader.load()

      knowledge_base = create_knowledge_base(docs)
    except:
      text="Try Another file"
      return  file_obj.name, text

    return file_obj.name, {"knowledge_base": knowledge_base}


def upload_via_url(url):
    if validators.url(url):
        r = requests.get(url)

        if r.status_code != 200:
            raise ValueError(
                "Check the url of your file; returned status code %s" % r.status_code
            )

        content_type = r.headers.get("content-type")
        file_extension = mimetypes.guess_extension(content_type)
        temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
        temp_file.write(r.content)
        file_path = temp_file.name
        loader = UnstructuredFileLoader(file_path, strategy="fast")
        docs = loader.load()
        with open(file_path, mode="rb") as f:
            pass
        knowledge_base = create_knowledge_base(docs)
        return file_path, {"knowledge_base": knowledge_base}
    else:
        raise ValueError("Please enter a valid URL")


def answer_question(question, state):

    try:
        knowledge_base = state["knowledge_base"]
        docs = knowledge_base.similarity_search(question)

        llm = OpenAI(temperature=0.4)
        chain = load_qa_chain(llm, chain_type="stuff")
        response = chain.run(input_documents=docs, question=question)
        return response
    except:
        return "Please upload Proper Document"
title = """<br><br><br><div style="text-align: center;max-width: 700px;">
            <h1><a style="display:inline-block; margin-left: 1em; text-decoration:none; font-weight:bold;" href="https://www.adople.com">ADOPLE AI</a> - Document QA</h1>
            </p>"""
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
    state = gr.State(get_empty_state())
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        gr.Markdown("**Upload your file**")
        with gr.Row(elem_id="row-flex"):
            with gr.Column(scale=0.85):
                file_url = gr.Textbox(
                      value="",
                      label="Upload your file",
                      placeholder="Enter a url",
                      show_label=False,
                      visible=True,elem_classes="filenameshow")
            with gr.Column(scale=0.15, min_width=160):
                upload_button = gr.UploadButton("Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],elem_classes="filenameshow")
        file_output = gr.File(elem_classes="filenameshow")
        with gr.Row():
            with gr.Column(scale=1, min_width=0):
                user_question = gr.Textbox(value="",label='Question Box :',show_label=True, placeholder="Ask a question about your file:",elem_classes="spaceH")
        with gr.Row():
            with gr.Column(scale=1, min_width=0):
                answer = gr.Textbox(value="",label='Answer Box :',show_label=True, placeholder="",lines=5)
    file_url.submit(upload_via_url, file_url, [file_output, state])
    upload_button.upload(upload_file, upload_button, [file_output,state])
    user_question.submit(answer_question, [user_question, state], [answer])

demo.launch()