Shriharsh commited on
Commit
79cf2b9
·
verified ·
1 Parent(s): 40e020c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +262 -0
app.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pinecone
3
+ import requests
4
+ import PyPDF2
5
+ from transformers import AutoTokenizer, AutoModel
6
+ import torch
7
+ import re
8
+ import google.generativeai as genai
9
+ import os
10
+ import time
11
+ from datetime import datetime, timedelta
12
+ from google.api_core import exceptions
13
+
14
+ # Constants
15
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") # Set in HF Spaces Secrets
16
+ PINECONE_INDEX_NAME = "diabetes-bot"
17
+ PINECONE_NAMESPACE = "general"
18
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # Set in HF Spaces Secrets
19
+ MODEL_NAME = "dmis-lab/biobert-base-cased-v1.1"
20
+
21
+ # Free tier limits
22
+ FREE_TIER_RPD_LIMIT = 1500 # Requests per day
23
+ FREE_TIER_RPM_LIMIT = 15 # Requests per minute
24
+ FREE_TIER_TPM_LIMIT = 1000000 # Tokens per minute
25
+ WARNING_THRESHOLD = 0.9 # Stop at 90% of the limit to be safe
26
+
27
+ # Usage tracking
28
+ usage_file = "usage.txt"
29
+
30
+ def load_usage():
31
+ if not os.path.exists(usage_file):
32
+ return {"requests": [], "tokens": []}
33
+ with open(usage_file, "r") as f:
34
+ data = f.read().strip()
35
+ if not data:
36
+ return {"requests": [], "tokens": []}
37
+ requests, tokens = data.split("|")
38
+ return {
39
+ "requests": [float(t) for t in requests.split(",") if t],
40
+ "tokens": [(float(t), float(n)) for t, n in [pair.split(":") for pair in tokens.split(",") if pair]]
41
+ }
42
+
43
+ def save_usage(requests, tokens):
44
+ with open(usage_file, "w") as f:
45
+ f.write(",".join(map(str, requests)) + "|" + ",".join(f"{t}:{n}" for t, n in tokens))
46
+
47
+ def check_usage():
48
+ usage = load_usage()
49
+ now = time.time()
50
+
51
+ # Clean up old requests (older than 24 hours)
52
+ day_ago = now - 24 * 60 * 60
53
+ usage["requests"] = [t for t in usage["requests"] if t > day_ago]
54
+
55
+ # Clean up old token counts (older than 1 minute)
56
+ minute_ago = now - 60
57
+ usage["tokens"] = [(t, n) for t, n in usage["tokens"] if t > minute_ago]
58
+
59
+ # Count requests per day
60
+ rpd = len(usage["requests"])
61
+ rpd_limit = int(FREE_TIER_RPD_LIMIT * WARNING_THRESHOLD)
62
+ if rpd >= rpd_limit:
63
+ return False, f"Approaching daily request limit ({rpd}/{FREE_TIER_RPD_LIMIT}). Stopping to stay in free tier. Try again tomorrow."
64
+
65
+ # Count requests per minute
66
+ minute_ago = now - 60
67
+ rpm = len([t for t in usage["requests"] if t > minute_ago])
68
+ rpm_limit = int(FREE_TIER_RPM_LIMIT * WARNING_THRESHOLD)
69
+ if rpm >= rpm_limit:
70
+ return False, f"Approaching minute request limit ({rpm}/{FREE_TIER_RPM_LIMIT}). Wait a minute and try again."
71
+
72
+ # Count tokens per minute
73
+ tpm = sum(n for t, n in usage["tokens"])
74
+ tpm_limit = int(FREE_TIER_TPM_LIMIT * WARNING_THRESHOLD)
75
+ if tpm >= tpm_limit:
76
+ return False, f"Approaching token limit ({tpm}/{FREE_TIER_TPM_LIMIT} per minute). Wait a minute and try again."
77
+
78
+ return True, (rpd, rpm, tpm)
79
+
80
+ # Initialize Pinecone
81
+ pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
82
+ index = pc.Index(PINECONE_INDEX_NAME)
83
+
84
+ # Initialize BioBERT for embedding queries
85
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
86
+ model = AutoModel.from_pretrained(MODEL_NAME)
87
+ if torch.cuda.is_available():
88
+ model.cuda()
89
+
90
+ # Initialize Gemini and check available models
91
+ genai.configure(api_key=GEMINI_API_KEY)
92
+
93
+ # List available models to confirm free tier access
94
+ available_models = [model.name for model in genai.list_models()]
95
+ print("Available Gemini models:", available_models)
96
+
97
+ # Select a free-tier model (prefer gemini-1.5-pro, fallback to gemini-1.5-flash)
98
+ preferred_model = "gemini-1.5-pro"
99
+ if preferred_model in available_models:
100
+ gemini_model = genai.GenerativeModel(preferred_model)
101
+ print(f"Using model: {preferred_model}")
102
+ else:
103
+ fallback_model = "gemini-1.5-flash"
104
+ if fallback_model in available_models:
105
+ gemini_model = genai.GenerativeModel(fallback_model)
106
+ print(f"Fallback to model: {fallback_model}")
107
+ else:
108
+ raise ValueError("No free-tier Gemini model available. Available models: " + str(available_models))
109
+
110
+ # Clean text
111
+ def clean_text(text):
112
+ text = re.sub(r'<[^>]+>', '', text) # Remove HTML tags
113
+ text = re.sub(r'[^\x00-\x7F]+', ' ', text) # Remove non-ASCII
114
+ text = re.sub(r'\s+', ' ', text) # Normalize spaces
115
+ return text.strip()
116
+
117
+ # Embed text using BioBERT
118
+ def embed_text(text):
119
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
120
+ if torch.cuda.is_available():
121
+ inputs = {k: v.cuda() for k, v in inputs.items()}
122
+ with torch.no_grad():
123
+ outputs = model(**inputs)
124
+ embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]
125
+ return embedding.tolist()
126
+
127
+ # Extract text from PDF (up to 10 pages)
128
+ def extract_pdf_text(pdf_file):
129
+ reader = PyPDF2.PdfReader(pdf_file)
130
+ num_pages = min(len(reader.pages), 10) # Limit to 10 pages
131
+ text = ""
132
+ for page in range(num_pages):
133
+ text += reader.pages[page].extract_text() + "\n"
134
+ return clean_text(text)
135
+
136
+ # Retrieve relevant chunks from Pinecone
137
+ def retrieve_from_pinecone(query, top_k=5):
138
+ query_embedding = embed_text(query)
139
+ results = index.query(
140
+ namespace=PINECONE_NAMESPACE,
141
+ vector=query_embedding,
142
+ top_k=top_k,
143
+ include_metadata=True
144
+ )
145
+ retrieved_chunks = [match["metadata"]["chunk"] for match in results["matches"]]
146
+ return "\n".join(retrieved_chunks)
147
+
148
+ # Count tokens using Gemini API
149
+ def count_tokens(text):
150
+ try:
151
+ response = gemini_model.count_tokens(text)
152
+ return response.total_tokens
153
+ except exceptions.QuotaExceeded as e:
154
+ return 0 # If quota is exceeded, return 0 to avoid counting issues
155
+
156
+ # Generate answer using Gemini
157
+ def generate_answer(query, context):
158
+ prompt = f"""
159
+ You are a diabetes research assistant. Answer the following question based on the provided context. If the context is insufficient, use your knowledge to provide a helpful answer, but note if the information might be limited.
160
+
161
+ **Question**: {query}
162
+
163
+ **Context**:
164
+ {context}
165
+
166
+ **Answer**:
167
+ """
168
+ try:
169
+ response = gemini_model.generate_content(prompt)
170
+ return response.text
171
+ except exceptions.QuotaExceeded as e:
172
+ return f"Error: Gemini API quota exceeded ({str(e)}). Try again later."
173
+ except Exception as e:
174
+ return f"Error generating answer: {str(e)}"
175
+
176
+ # Main function to handle user input
177
+ def diabetes_bot(query, pdf_file=None):
178
+ # Check usage limits
179
+ can_proceed, usage_info = check_usage()
180
+ if not can_proceed:
181
+ return usage_info
182
+
183
+ # Step 1: Get context from PDF if uploaded
184
+ pdf_context = ""
185
+ if pdf_file is not None:
186
+ pdf_context = extract_pdf_text(pdf_file)
187
+ if pdf_context:
188
+ pdf_context = f"Uploaded PDF content:\n{pdf_context}\n\n"
189
+
190
+ # Step 2: Retrieve relevant chunks from Pinecone
191
+ pinecone_context = retrieve_from_pinecone(query)
192
+ if pinecone_context:
193
+ pinecone_context = f"Pinecone retrieved content (latest research, 2010 onward):\n{pinecone_context}\n\n"
194
+
195
+ # Step 3: Combine contexts
196
+ full_context = pdf_context + pinecone_context
197
+ if not full_context.strip():
198
+ full_context = "No relevant context found in Pinecone or uploaded PDF."
199
+
200
+ # Step 4: Count tokens for the prompt
201
+ prompt = f"""
202
+ You are a diabetes research assistant. Answer the following question based on the provided context. If the context is insufficient, use your knowledge to provide a helpful answer, but note if the information might be limited.
203
+
204
+ **Question**: {query}
205
+
206
+ **Context**:
207
+ {full_context}
208
+
209
+ **Answer**:
210
+ """
211
+ input_tokens = count_tokens(prompt)
212
+ if input_tokens == 0: # Quota exceeded during token counting
213
+ return "Error: Gemini API quota exceeded while counting tokens. Try again later."
214
+
215
+ # Update usage
216
+ usage = load_usage()
217
+ now = time.time()
218
+ usage["requests"].append(now)
219
+ usage["tokens"].append((now, input_tokens))
220
+ save_usage(usage["requests"], usage["tokens"])
221
+
222
+ # Step 5: Generate answer using Gemini
223
+ answer = generate_answer(query, full_context)
224
+
225
+ # Step 6: Count output tokens and update usage
226
+ output_tokens = count_tokens(answer)
227
+ if output_tokens == 0: # Quota exceeded during output token counting
228
+ return answer + "\n\nError: Gemini API quota exceeded while counting output tokens. Usage stats may be incomplete."
229
+ usage = load_usage()
230
+ usage["tokens"].append((now, output_tokens))
231
+ save_usage(usage["requests"], usage["tokens"])
232
+
233
+ # Step 7: Show usage stats
234
+ rpd, rpm, tpm = check_usage()[1]
235
+ usage_message = f"\n\nUsage: {rpd}/{FREE_TIER_RPD_LIMIT} requests today, {rpm}/{FREE_TIER_RPM_LIMIT} requests this minute, {tpm}/{FREE_TIER_TPM_LIMIT} tokens this minute."
236
+
237
+ return answer + usage_message
238
+
239
+ # Gradio interface
240
+ with gr.Blocks() as app:
241
+ gr.Markdown("""
242
+ # Diabetes-Bot 🩺
243
+ Ask questions about diabetes or upload a research paper (up to 10 pages) for Q&A.
244
+ **Powered by the latest diabetes research (2010 onward). For pre-2010 papers, upload your research PDF!**
245
+ **Running on Gemini API free tier (1,500 requests/day, 15 requests/minute, 1M tokens/minute). No payment method linked—strictly free!**
246
+ """)
247
+
248
+ with gr.Row():
249
+ query_input = gr.Textbox(label="Ask a question", placeholder="e.g., What are the latest treatments for type 2 diabetes?")
250
+ pdf_input = gr.File(label="Upload a PDF (optional, max 10 pages)", file_types=[".pdf"])
251
+
252
+ submit_button = gr.Button("Submit")
253
+ output = gr.Textbox(label="Answer")
254
+
255
+ submit_button.click(
256
+ fn=diabetes_bot,
257
+ inputs=[query_input, pdf_input],
258
+ outputs=output
259
+ )
260
+
261
+ # Launch the app
262
+ app.launch()