kajila commited on
Commit
70935d7
·
verified ·
1 Parent(s): b9321fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -4
app.py CHANGED
@@ -1,3 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
 
3
  # Define the predict function
@@ -89,7 +161,3 @@ with gr.Blocks(theme="gr.themes.Monochrome()") as demo:
89
 
90
  demo.queue()
91
  demo.launch()
92
-
93
-
94
-
95
-
 
1
+ import subprocess
2
+ import sys
3
+ import os
4
+ import uuid
5
+ import json
6
+ from pathlib import Path
7
+ import gradio as gr
8
+ from dotenv import load_dotenv
9
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
10
+ from langchain_community.vectorstores import Chroma
11
+ from huggingface_hub import login
12
+ import openai
13
+
14
+ def install_packages():
15
+ packages = ["openai==0.28", "langchain_community", "sentence-transformers", "chromadb", "huggingface_hub", "python-dotenv", "numpy", "scipy", "scikit-learn"]
16
+ for package in packages:
17
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", package])
18
+
19
+ install_packages()
20
+
21
+ # Load environment variables from .env file
22
+ load_dotenv()
23
+
24
+ # Get API tokens from environment variables
25
+ openai.api_key = os.getenv("OPENAI_API_KEY") # Ensure OPENAI_API_KEY is in your .env file
26
+ hf_token = os.getenv("hf_token")
27
+
28
+ if not hf_token:
29
+ raise ValueError("Hugging Face token is missing. Please set 'hf_token' as an environment variable.")
30
+
31
+ # Log in to Hugging Face
32
+ login(hf_token)
33
+ print("Logged in to Hugging Face successfully.")
34
+
35
+ # Set up embeddings and vector store
36
+ embeddings = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
37
+ collection_name = 'report-10k-2024'
38
+
39
+ vectorstore_persisted = Chroma(
40
+ collection_name=collection_name,
41
+ persist_directory='./report_10kdb',
42
+ embedding_function=embeddings
43
+ )
44
+
45
+ # Set up the retriever
46
+ retriever = vectorstore_persisted.as_retriever(
47
+ search_type='similarity',
48
+ search_kwargs={'k': 5}
49
+ )
50
+
51
+ # Define Q&A system messages
52
+ qna_system_message = """
53
+ You are an AI assistant to help Finsights Grey Inc., an innovative financial technology firm, develop a Retrieval-Augmented Generation (RAG) system to automate the extraction, summarization, and analysis of information from 10-K reports. Your knowledge base was last updated in August 2023.
54
+ User input will have the context required by you to answer user questions. This context will begin with the token: ###Context.
55
+ The context contains references to specific portions of a 10-K report relevant to the user query.
56
+ User questions will begin with the token: ###Question.
57
+ Your response should only be about the question asked and the context provided.
58
+ Do not mention anything about the context in your final answer.
59
+ If the answer is not found in the context, it is very important for you to respond with "I don't know."
60
+ Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Source:
61
+ Do not make up sources. Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
62
+ Here is an example of how to structure your response:
63
+ Answer:
64
+ [Answer]
65
+ Source:
66
+ [Source]
67
+ """
68
+
69
+ qna_user_message_template = """
70
+ ###Context
71
+ Here are some documents that are relevant to the question.
72
+ {context}
73
  """
74
 
75
  # Define the predict function
 
161
 
162
  demo.queue()
163
  demo.launch()