Abijith commited on
Commit
2154fa4
Β·
verified Β·
1 Parent(s): 7a8a642

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -62
app.py CHANGED
@@ -4,6 +4,7 @@ import numpy as np
4
  import re
5
  import tempfile
6
  from datetime import datetime
 
7
  from langchain_community.document_loaders import PDFPlumberLoader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -18,46 +19,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
18
  from langchain.llms.huggingface_pipeline import HuggingFacePipeline
19
  from huggingface_hub import login
20
 
21
- #### Model Testing ###########
22
- print(f"-- Model test started")
23
- from transformers import AutoModelForCausalLM, AutoTokenizer
24
-
25
- model_name = "Qwen/Qwen2.5-0.5B-Instruct"
26
-
27
- model = AutoModelForCausalLM.from_pretrained(
28
- model_name,
29
- )
30
- tokenizer = AutoTokenizer.from_pretrained(model_name)
31
-
32
- prompt = "Give me a short introduction to large language model."
33
- messages = [
34
- {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
35
- {"role": "user", "content": prompt}
36
- ]
37
- text = tokenizer.apply_chat_template(
38
- messages,
39
- tokenize=False,
40
- add_generation_prompt=True
41
- )
42
- print(f"-- Model Invoking")
43
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
44
-
45
- generated_ids = model.generate(
46
- **model_inputs,
47
- max_new_tokens=512
48
- )
49
- generated_ids = [
50
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
51
- ]
52
-
53
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
54
- print(f"-- Model testresponse{model_inputs}")
55
-
56
- ##########################
57
-
58
 
59
  # Load the model and tokenizer
60
- # model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
61
  model_name= "Qwen/Qwen2.5-0.5B-Instruct"
62
 
63
  # Initialize classifier once for input guardrail
@@ -122,34 +85,16 @@ if uploaded_files:
122
  tokenizer = AutoTokenizer.from_pretrained(
123
  model_name,
124
  trust_remote_code=True,
125
- padding_side="left" # Important for some models
126
  )
127
  model = AutoModelForCausalLM.from_pretrained(
128
  model_name,
129
  trust_remote_code=True,
130
  )
131
-
132
- # Create pipeline with generation parameters
133
- pipeline_llm = pipeline(
134
- "text-generation",
135
- model=model,
136
- tokenizer=tokenizer,
137
- max_new_tokens=1024,
138
- temperature=0.3,
139
- top_p=0.95,
140
- repetition_penalty=1.15,
141
- return_full_text=False # Important for response formatting
142
- )
143
-
144
- llm = HuggingFacePipeline(pipeline=pipeline_llm)
145
-
146
- llm_sample_resp = llm("Explain what is Retrieval Augmented Generation (RAG)?")
147
- print(f"-- llmsampleresponse:{llm_sample_resp}")
148
-
149
 
150
  # Update prompt template
151
  PROMPT_TEMPLATE = """
152
- <|system|>
153
  You are a senior financial analyst. Analyze these financial reports:
154
  1. Compare key metrics between documents
155
  2. Identify trends across reporting periods
@@ -168,7 +113,6 @@ if uploaded_files:
168
  template=PROMPT_TEMPLATE,
169
  input_variables=["context", "question"]
170
  )
171
- llm_chain = LLMChain(llm=llm, prompt=qa_prompt)
172
 
173
  # Interactive Q&A Interface
174
  st.header("πŸ” Cross-Document Financial Inquiry")
@@ -215,11 +159,35 @@ if uploaded_files:
215
  # Response Generation
216
  context = "\n".join([doc.page_content for doc in filtered_docs])
217
  print(f"-- Retrieved context:{context}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
- analysis = llm_chain.run(
220
- context=context,
221
- question=user_input
 
222
  )
 
 
 
 
 
 
 
223
  print(f"Analysis result:{analysis}")
224
 
225
  # Response Cleaning
 
4
  import re
5
  import tempfile
6
  from datetime import datetime
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from langchain_community.document_loaders import PDFPlumberLoader
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
19
  from langchain.llms.huggingface_pipeline import HuggingFacePipeline
20
  from huggingface_hub import login
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Load the model and tokenizer
 
24
  model_name= "Qwen/Qwen2.5-0.5B-Instruct"
25
 
26
  # Initialize classifier once for input guardrail
 
85
  tokenizer = AutoTokenizer.from_pretrained(
86
  model_name,
87
  trust_remote_code=True,
 
88
  )
89
  model = AutoModelForCausalLM.from_pretrained(
90
  model_name,
91
  trust_remote_code=True,
92
  )
93
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  # Update prompt template
96
  PROMPT_TEMPLATE = """
97
+ <|User|>
98
  You are a senior financial analyst. Analyze these financial reports:
99
  1. Compare key metrics between documents
100
  2. Identify trends across reporting periods
 
113
  template=PROMPT_TEMPLATE,
114
  input_variables=["context", "question"]
115
  )
 
116
 
117
  # Interactive Q&A Interface
118
  st.header("πŸ” Cross-Document Financial Inquiry")
 
159
  # Response Generation
160
  context = "\n".join([doc.page_content for doc in filtered_docs])
161
  print(f"-- Retrieved context:{context}")
162
+
163
+ # prompt
164
+ prompt = qa_prompt.format(context=context, question=user_input)
165
+ ####
166
+ # Generation
167
+ messages = [
168
+ {"role": "system", "content": "You are Financial assistant."},
169
+ {"role": "user", "content": prompt}
170
+ ]
171
+ text = tokenizer.apply_chat_template(
172
+ messages,
173
+ tokenize=False,
174
+ add_generation_prompt=True
175
+ )
176
+
177
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
178
 
179
+ print(f"-- Model Invoking")
180
+ generated_ids = model.generate(
181
+ **model_inputs,
182
+ max_new_tokens=512
183
  )
184
+ generated_ids = [
185
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
186
+ ]
187
+
188
+ analysis = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
189
+
190
+ ###
191
  print(f"Analysis result:{analysis}")
192
 
193
  # Response Cleaning