Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import numpy as np
|
|
4 |
import re
|
5 |
import tempfile
|
6 |
from datetime import datetime
|
|
|
7 |
from langchain_community.document_loaders import PDFPlumberLoader
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
@@ -18,46 +19,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
18 |
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
19 |
from huggingface_hub import login
|
20 |
|
21 |
-
#### Model Testing ###########
|
22 |
-
print(f"-- Model test started")
|
23 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
24 |
-
|
25 |
-
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
|
26 |
-
|
27 |
-
model = AutoModelForCausalLM.from_pretrained(
|
28 |
-
model_name,
|
29 |
-
)
|
30 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
31 |
-
|
32 |
-
prompt = "Give me a short introduction to large language model."
|
33 |
-
messages = [
|
34 |
-
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
|
35 |
-
{"role": "user", "content": prompt}
|
36 |
-
]
|
37 |
-
text = tokenizer.apply_chat_template(
|
38 |
-
messages,
|
39 |
-
tokenize=False,
|
40 |
-
add_generation_prompt=True
|
41 |
-
)
|
42 |
-
print(f"-- Model Invoking")
|
43 |
-
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
44 |
-
|
45 |
-
generated_ids = model.generate(
|
46 |
-
**model_inputs,
|
47 |
-
max_new_tokens=512
|
48 |
-
)
|
49 |
-
generated_ids = [
|
50 |
-
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
51 |
-
]
|
52 |
-
|
53 |
-
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
54 |
-
print(f"-- Model testresponse{model_inputs}")
|
55 |
-
|
56 |
-
##########################
|
57 |
-
|
58 |
|
59 |
# Load the model and tokenizer
|
60 |
-
# model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
61 |
model_name= "Qwen/Qwen2.5-0.5B-Instruct"
|
62 |
|
63 |
# Initialize classifier once for input guardrail
|
@@ -122,34 +85,16 @@ if uploaded_files:
|
|
122 |
tokenizer = AutoTokenizer.from_pretrained(
|
123 |
model_name,
|
124 |
trust_remote_code=True,
|
125 |
-
padding_side="left" # Important for some models
|
126 |
)
|
127 |
model = AutoModelForCausalLM.from_pretrained(
|
128 |
model_name,
|
129 |
trust_remote_code=True,
|
130 |
)
|
131 |
-
|
132 |
-
# Create pipeline with generation parameters
|
133 |
-
pipeline_llm = pipeline(
|
134 |
-
"text-generation",
|
135 |
-
model=model,
|
136 |
-
tokenizer=tokenizer,
|
137 |
-
max_new_tokens=1024,
|
138 |
-
temperature=0.3,
|
139 |
-
top_p=0.95,
|
140 |
-
repetition_penalty=1.15,
|
141 |
-
return_full_text=False # Important for response formatting
|
142 |
-
)
|
143 |
-
|
144 |
-
llm = HuggingFacePipeline(pipeline=pipeline_llm)
|
145 |
-
|
146 |
-
llm_sample_resp = llm("Explain what is Retrieval Augmented Generation (RAG)?")
|
147 |
-
print(f"-- llmsampleresponse:{llm_sample_resp}")
|
148 |
-
|
149 |
|
150 |
# Update prompt template
|
151 |
PROMPT_TEMPLATE = """
|
152 |
-
<|
|
153 |
You are a senior financial analyst. Analyze these financial reports:
|
154 |
1. Compare key metrics between documents
|
155 |
2. Identify trends across reporting periods
|
@@ -168,7 +113,6 @@ if uploaded_files:
|
|
168 |
template=PROMPT_TEMPLATE,
|
169 |
input_variables=["context", "question"]
|
170 |
)
|
171 |
-
llm_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
172 |
|
173 |
# Interactive Q&A Interface
|
174 |
st.header("π Cross-Document Financial Inquiry")
|
@@ -215,11 +159,35 @@ if uploaded_files:
|
|
215 |
# Response Generation
|
216 |
context = "\n".join([doc.page_content for doc in filtered_docs])
|
217 |
print(f"-- Retrieved context:{context}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
222 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
print(f"Analysis result:{analysis}")
|
224 |
|
225 |
# Response Cleaning
|
|
|
4 |
import re
|
5 |
import tempfile
|
6 |
from datetime import datetime
|
7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
from langchain_community.document_loaders import PDFPlumberLoader
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
19 |
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
20 |
from huggingface_hub import login
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Load the model and tokenizer
|
|
|
24 |
model_name= "Qwen/Qwen2.5-0.5B-Instruct"
|
25 |
|
26 |
# Initialize classifier once for input guardrail
|
|
|
85 |
tokenizer = AutoTokenizer.from_pretrained(
|
86 |
model_name,
|
87 |
trust_remote_code=True,
|
|
|
88 |
)
|
89 |
model = AutoModelForCausalLM.from_pretrained(
|
90 |
model_name,
|
91 |
trust_remote_code=True,
|
92 |
)
|
93 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
# Update prompt template
|
96 |
PROMPT_TEMPLATE = """
|
97 |
+
<|User|>
|
98 |
You are a senior financial analyst. Analyze these financial reports:
|
99 |
1. Compare key metrics between documents
|
100 |
2. Identify trends across reporting periods
|
|
|
113 |
template=PROMPT_TEMPLATE,
|
114 |
input_variables=["context", "question"]
|
115 |
)
|
|
|
116 |
|
117 |
# Interactive Q&A Interface
|
118 |
st.header("π Cross-Document Financial Inquiry")
|
|
|
159 |
# Response Generation
|
160 |
context = "\n".join([doc.page_content for doc in filtered_docs])
|
161 |
print(f"-- Retrieved context:{context}")
|
162 |
+
|
163 |
+
# prompt
|
164 |
+
prompt = qa_prompt.format(context=context, question=user_input)
|
165 |
+
####
|
166 |
+
# Generation
|
167 |
+
messages = [
|
168 |
+
{"role": "system", "content": "You are Financial assistant."},
|
169 |
+
{"role": "user", "content": prompt}
|
170 |
+
]
|
171 |
+
text = tokenizer.apply_chat_template(
|
172 |
+
messages,
|
173 |
+
tokenize=False,
|
174 |
+
add_generation_prompt=True
|
175 |
+
)
|
176 |
+
|
177 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
178 |
|
179 |
+
print(f"-- Model Invoking")
|
180 |
+
generated_ids = model.generate(
|
181 |
+
**model_inputs,
|
182 |
+
max_new_tokens=512
|
183 |
)
|
184 |
+
generated_ids = [
|
185 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
186 |
+
]
|
187 |
+
|
188 |
+
analysis = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
189 |
+
|
190 |
+
###
|
191 |
print(f"Analysis result:{analysis}")
|
192 |
|
193 |
# Response Cleaning
|