Suat ATAN
commited on
Commit
•
7d1720e
1
Parent(s):
9177215
second commit
Browse files- __pycache__/ui.cpython-311.pyc +0 -0
- app.py +8 -8
- components/__pycache__/sidebar.cpython-311.pyc +0 -0
- components/sidebar.py +1 -1
- core/__pycache__/caching.cpython-311.pyc +0 -0
- core/__pycache__/chunking.cpython-311.pyc +0 -0
- core/__pycache__/embedding.cpython-311.pyc +0 -0
- core/__pycache__/qa.cpython-311.pyc +0 -0
- core/__pycache__/utils.cpython-311.pyc +0 -0
- core/caching.py +4 -4
- core/chunking.py +1 -1
- core/embedding.py +2 -2
- core/qa.py +2 -2
- core/utils.py +1 -1
- poetry.lock +0 -0
- poetry.toml +2 -0
- pyproject.toml +47 -0
- ui.py +1 -1
__pycache__/ui.cpython-311.pyc
ADDED
Binary file (4.4 kB). View file
|
|
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
from
|
4 |
|
5 |
-
from
|
6 |
wrap_doc_in_html,
|
7 |
is_query_valid,
|
8 |
is_file_valid,
|
@@ -10,13 +10,13 @@ from knowledge_gpt.ui import (
|
|
10 |
display_file_read_error,
|
11 |
)
|
12 |
|
13 |
-
from
|
14 |
|
15 |
-
from
|
16 |
-
from
|
17 |
-
from
|
18 |
-
from
|
19 |
-
from
|
20 |
|
21 |
|
22 |
EMBEDDING = "openai"
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
from components.sidebar import sidebar
|
4 |
|
5 |
+
from ui import (
|
6 |
wrap_doc_in_html,
|
7 |
is_query_valid,
|
8 |
is_file_valid,
|
|
|
10 |
display_file_read_error,
|
11 |
)
|
12 |
|
13 |
+
from core.caching import bootstrap_caching
|
14 |
|
15 |
+
from core.parsing import read_file
|
16 |
+
from core.chunking import chunk_file
|
17 |
+
from core.embedding import embed_files
|
18 |
+
from core.qa import query_folder
|
19 |
+
from core.utils import get_llm
|
20 |
|
21 |
|
22 |
EMBEDDING = "openai"
|
components/__pycache__/sidebar.cpython-311.pyc
CHANGED
Binary files a/components/__pycache__/sidebar.cpython-311.pyc and b/components/__pycache__/sidebar.cpython-311.pyc differ
|
|
components/sidebar.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
from
|
4 |
from dotenv import load_dotenv
|
5 |
import os
|
6 |
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
from components.faq import faq
|
4 |
from dotenv import load_dotenv
|
5 |
import os
|
6 |
|
core/__pycache__/caching.cpython-311.pyc
CHANGED
Binary files a/core/__pycache__/caching.cpython-311.pyc and b/core/__pycache__/caching.cpython-311.pyc differ
|
|
core/__pycache__/chunking.cpython-311.pyc
CHANGED
Binary files a/core/__pycache__/chunking.cpython-311.pyc and b/core/__pycache__/chunking.cpython-311.pyc differ
|
|
core/__pycache__/embedding.cpython-311.pyc
CHANGED
Binary files a/core/__pycache__/embedding.cpython-311.pyc and b/core/__pycache__/embedding.cpython-311.pyc differ
|
|
core/__pycache__/qa.cpython-311.pyc
CHANGED
Binary files a/core/__pycache__/qa.cpython-311.pyc and b/core/__pycache__/qa.cpython-311.pyc differ
|
|
core/__pycache__/utils.cpython-311.pyc
CHANGED
Binary files a/core/__pycache__/utils.cpython-311.pyc and b/core/__pycache__/utils.cpython-311.pyc differ
|
|
core/caching.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import streamlit as st
|
2 |
from streamlit.runtime.caching.hashing import HashFuncsDict
|
3 |
|
4 |
-
import
|
5 |
-
import
|
6 |
-
import
|
7 |
-
from
|
8 |
|
9 |
|
10 |
def file_hash_func(file: File) -> str:
|
|
|
1 |
import streamlit as st
|
2 |
from streamlit.runtime.caching.hashing import HashFuncsDict
|
3 |
|
4 |
+
import core.parsing as parsing
|
5 |
+
import core.chunking as chunking
|
6 |
+
import core.embedding as embedding
|
7 |
+
from core.parsing import File
|
8 |
|
9 |
|
10 |
def file_hash_func(file: File) -> str:
|
core/chunking.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from langchain.docstore.document import Document
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
-
from
|
4 |
|
5 |
|
6 |
def chunk_file(
|
|
|
1 |
from langchain.docstore.document import Document
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from core.parsing import File
|
4 |
|
5 |
|
6 |
def chunk_file(
|
core/embedding.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from langchain.vectorstores import VectorStore
|
2 |
-
from
|
3 |
from langchain.vectorstores.faiss import FAISS
|
4 |
from langchain.embeddings import OpenAIEmbeddings
|
5 |
from langchain.embeddings.base import Embeddings
|
6 |
from typing import List, Type
|
7 |
from langchain.docstore.document import Document
|
8 |
-
from
|
9 |
|
10 |
|
11 |
class FolderIndex:
|
|
|
1 |
from langchain.vectorstores import VectorStore
|
2 |
+
from core.parsing import File
|
3 |
from langchain.vectorstores.faiss import FAISS
|
4 |
from langchain.embeddings import OpenAIEmbeddings
|
5 |
from langchain.embeddings.base import Embeddings
|
6 |
from typing import List, Type
|
7 |
from langchain.docstore.document import Document
|
8 |
+
from core.debug import FakeVectorStore, FakeEmbeddings
|
9 |
|
10 |
|
11 |
class FolderIndex:
|
core/qa.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
from typing import List
|
2 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
3 |
-
from
|
4 |
from langchain.docstore.document import Document
|
5 |
-
from
|
6 |
from pydantic import BaseModel
|
7 |
from langchain.chat_models.base import BaseChatModel
|
8 |
|
|
|
1 |
from typing import List
|
2 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
3 |
+
from core.prompts import STUFF_PROMPT
|
4 |
from langchain.docstore.document import Document
|
5 |
+
from core.embedding import FolderIndex
|
6 |
from pydantic import BaseModel
|
7 |
from langchain.chat_models.base import BaseChatModel
|
8 |
|
core/utils.py
CHANGED
@@ -3,7 +3,7 @@ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
|
3 |
from langchain.docstore.document import Document
|
4 |
|
5 |
from langchain.chat_models import ChatOpenAI
|
6 |
-
from
|
7 |
from langchain.chat_models.base import BaseChatModel
|
8 |
|
9 |
|
|
|
3 |
from langchain.docstore.document import Document
|
4 |
|
5 |
from langchain.chat_models import ChatOpenAI
|
6 |
+
from core.debug import FakeChatModel
|
7 |
from langchain.chat_models.base import BaseChatModel
|
8 |
|
9 |
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
poetry.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[virtualenvs]
|
2 |
+
in-project = true
|
pyproject.toml
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "reportio"
|
3 |
+
version = "0.0.5"
|
4 |
+
description = "Accurate answers and instant citations for your documents"
|
5 |
+
authors = ["Suat ATAN <[email protected]>"]
|
6 |
+
license = "MIT"
|
7 |
+
readme = "README.md"
|
8 |
+
packages = [{include = "knowledge_gpt"}]
|
9 |
+
|
10 |
+
[tool.poetry.dependencies]
|
11 |
+
python = "^3.9.18"
|
12 |
+
streamlit = "^1.24.0"
|
13 |
+
langchain = "^0.0.220"
|
14 |
+
cohere = "^3.2.1"
|
15 |
+
faiss-cpu = "^1.7.3"
|
16 |
+
openai = "^0.27.8"
|
17 |
+
docx2txt = "^0.8"
|
18 |
+
pillow = "^9.4.0"
|
19 |
+
tenacity = "^8.2.0"
|
20 |
+
tiktoken = "^0.4.0"
|
21 |
+
pycryptodome = "^3.18.0"
|
22 |
+
pymupdf = "^1.22.5"
|
23 |
+
transformers = "^4.33.1"
|
24 |
+
python-dotenv = "^0.21.1"
|
25 |
+
|
26 |
+
|
27 |
+
[tool.poetry.group.dev.dependencies]
|
28 |
+
pytest = "^7.2.1"
|
29 |
+
ipykernel = "^6.23.3"
|
30 |
+
ipywidgets = "^8.0.6"
|
31 |
+
|
32 |
+
|
33 |
+
[tool.poetry.group.lint.dependencies]
|
34 |
+
isort = "^5.12.0"
|
35 |
+
black = {version = "^23.1a1", allow-prereleases = true}
|
36 |
+
flake8 = "^6.0.0"
|
37 |
+
|
38 |
+
|
39 |
+
[tool.poetry.group.extras.dependencies]
|
40 |
+
ipykernel = "^6.25.1"
|
41 |
+
|
42 |
+
[tool.isort]
|
43 |
+
profile = "black"
|
44 |
+
|
45 |
+
[build-system]
|
46 |
+
requires = ["poetry-core"]
|
47 |
+
build-backend = "poetry.core.masonry.api"
|
ui.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from typing import List
|
2 |
import streamlit as st
|
3 |
from langchain.docstore.document import Document
|
4 |
-
from
|
5 |
import openai
|
6 |
from streamlit.logger import get_logger
|
7 |
from typing import NoReturn
|
|
|
1 |
from typing import List
|
2 |
import streamlit as st
|
3 |
from langchain.docstore.document import Document
|
4 |
+
from core.parsing import File
|
5 |
import openai
|
6 |
from streamlit.logger import get_logger
|
7 |
from typing import NoReturn
|