Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
|
3 |
# Define the predict function
|
@@ -89,7 +161,3 @@ with gr.Blocks(theme="gr.themes.Monochrome()") as demo:
|
|
89 |
|
90 |
demo.queue()
|
91 |
demo.launch()
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
1 |
+
import subprocess
|
2 |
+
import sys
|
3 |
+
import os
|
4 |
+
import uuid
|
5 |
+
import json
|
6 |
+
from pathlib import Path
|
7 |
+
import gradio as gr
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
10 |
+
from langchain_community.vectorstores import Chroma
|
11 |
+
from huggingface_hub import login
|
12 |
+
import openai
|
13 |
+
|
14 |
+
def install_packages():
|
15 |
+
packages = ["openai==0.28", "langchain_community", "sentence-transformers", "chromadb", "huggingface_hub", "python-dotenv", "numpy", "scipy", "scikit-learn"]
|
16 |
+
for package in packages:
|
17 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", package])
|
18 |
+
|
19 |
+
install_packages()
|
20 |
+
|
21 |
+
# Load environment variables from .env file
|
22 |
+
load_dotenv()
|
23 |
+
|
24 |
+
# Get API tokens from environment variables
|
25 |
+
openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure OPENAI_API_KEY is in your .env file
|
26 |
+
hf_token = os.getenv("hf_token")
|
27 |
+
|
28 |
+
if not hf_token:
|
29 |
+
raise ValueError("Hugging Face token is missing. Please set 'hf_token' as an environment variable.")
|
30 |
+
|
31 |
+
# Log in to Hugging Face
|
32 |
+
login(hf_token)
|
33 |
+
print("Logged in to Hugging Face successfully.")
|
34 |
+
|
35 |
+
# Set up embeddings and vector store
|
36 |
+
embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
|
37 |
+
collection_name = 'report-10k-2024'
|
38 |
+
|
39 |
+
vectorstore_persisted = Chroma(
|
40 |
+
collection_name=collection_name,
|
41 |
+
persist_directory='./report_10kdb',
|
42 |
+
embedding_function=embeddings
|
43 |
+
)
|
44 |
+
|
45 |
+
# Set up the retriever
|
46 |
+
retriever = vectorstore_persisted.as_retriever(
|
47 |
+
search_type='similarity',
|
48 |
+
search_kwargs={'k': 5}
|
49 |
+
)
|
50 |
+
|
51 |
+
# Define Q&A system messages
|
52 |
+
qna_system_message = """
|
53 |
+
You are an AI assistant to help Finsights Grey Inc., an innovative financial technology firm, develop a Retrieval-Augmented Generation (RAG) system to automate the extraction, summarization, and analysis of information from 10-K reports. Your knowledge base was last updated in August 2023.
|
54 |
+
User input will have the context required by you to answer user questions. This context will begin with the token: ###Context.
|
55 |
+
The context contains references to specific portions of a 10-K report relevant to the user query.
|
56 |
+
User questions will begin with the token: ###Question.
|
57 |
+
Your response should only be about the question asked and the context provided.
|
58 |
+
Do not mention anything about the context in your final answer.
|
59 |
+
If the answer is not found in the context, it is very important for you to respond with "I don't know."
|
60 |
+
Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Source:
|
61 |
+
Do not make up sources. Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
|
62 |
+
Here is an example of how to structure your response:
|
63 |
+
Answer:
|
64 |
+
[Answer]
|
65 |
+
Source:
|
66 |
+
[Source]
|
67 |
+
"""
|
68 |
+
|
69 |
+
qna_user_message_template = """
|
70 |
+
###Context
|
71 |
+
Here are some documents that are relevant to the question.
|
72 |
+
{context}
|
73 |
"""
|
74 |
|
75 |
# Define the predict function
|
|
|
161 |
|
162 |
demo.queue()
|
163 |
demo.launch()
|
|
|
|
|
|
|
|