Jatin Mehra commited on
Commit
431214c
·
1 Parent(s): 35d0c2c

feat: add Flask UI with configuration and run script

Browse files
config.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Flask app configuration
4
+ class Config:
5
+ SECRET_KEY = os.environ.get('SECRET_KEY', 'dev-secret-key')
6
+ DEBUG = False
7
+ TESTING = False
8
+
9
+ class DevelopmentConfig(Config):
10
+ DEBUG = True
11
+
12
+ class TestingConfig(Config):
13
+ TESTING = True
14
+
15
+ class ProductionConfig(Config):
16
+ DEBUG = False
17
+
18
+ # Configuration dictionary
19
+ config = {
20
+ 'development': DevelopmentConfig,
21
+ 'testing': TestingConfig,
22
+ 'production': ProductionConfig,
23
+ 'default': DevelopmentConfig
24
+ }
25
+
26
+ # Get configuration by name
27
+ def get_config(config_name):
28
+ return config.get(config_name, config['default'])
pyproject.toml CHANGED
@@ -35,7 +35,10 @@ dependencies = [
35
  "playwright>=1.41.0",
36
  "asyncio>=3.4.3",
37
  "sqlalchemy>=2.0.37",
38
- "passlib>=1.7.4"
 
 
 
39
  ]
40
 
41
  [project.optional-dependencies]
 
35
  "playwright>=1.41.0",
36
  "asyncio>=3.4.3",
37
  "sqlalchemy>=2.0.37",
38
+ "passlib>=1.7.4",
39
+ "flask",
40
+ "flask_cors",
41
+ "PyJWT"
42
  ]
43
 
44
  [project.optional-dependencies]
src/crawlgpt/ui/__init__.py ADDED
File without changes
src/crawlgpt/ui/app.py ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, session
2
+ from flask_cors import CORS
3
+ import asyncio
4
+ import time
5
+ from datetime import datetime
6
+ import json
7
+ import jwt
8
+ from functools import wraps
9
+ import os
10
+ from collections import defaultdict
11
+
12
+ from src.crawlgpt.core.LLMBasedCrawler import Model
13
+ from src.crawlgpt.core.database import save_chat_message, get_chat_history, delete_user_chat_history, restore_chat_history
14
+ from src.crawlgpt.utils.monitoring import MetricsCollector, Metrics
15
+ from src.crawlgpt.utils.data_manager import DataManager
16
+ from src.crawlgpt.utils.content_validator import ContentValidator
17
+ from src.crawlgpt.ui.login import authenticate_user, create_user # Assuming this function exists or can be adapted
18
+
19
+ app = Flask(__name__)
20
+ CORS(app)
21
+ app.secret_key = os.environ.get('SECRET_KEY', 'dev-secret-key') # For JWT token generation
22
+ TOKEN_EXPIRATION = 24 * 60 * 60 # 24 hours
23
+
24
+ # Initialize global components
25
+ model = Model()
26
+ data_manager = DataManager()
27
+ content_validator = ContentValidator()
28
+ metrics_collector = MetricsCollector()
29
+
30
+ # User sessions storage (in production, use Redis or a database)
31
+ user_sessions = {}
32
+
33
+
34
+ # Rate limiting configuration
35
+ RATE_LIMIT = 10 # requests per minute
36
+ rate_limit_data = defaultdict(list) # stores timestamps of requests by user_id
37
+
38
+ # Rate limiter decorator for AI-intensive endpoints
39
+ def rate_limit(f):
40
+ @wraps(f)
41
+ def decorated(current_user_id, *args, **kwargs):
42
+ # Get current time
43
+ current_time = time.time()
44
+
45
+ # Clean up old timestamps (older than 1 minute)
46
+ rate_limit_data[current_user_id] = [
47
+ t for t in rate_limit_data[current_user_id]
48
+ if current_time - t < 60
49
+ ]
50
+
51
+ # Check if user has exceeded rate limit
52
+ if len(rate_limit_data[current_user_id]) >= RATE_LIMIT:
53
+ response = jsonify({
54
+ 'success': False,
55
+ 'message': 'Rate limit exceeded. Please try again later.'
56
+ }), 429
57
+ response[0].headers['X-RateLimit-Limit'] = str(RATE_LIMIT)
58
+ response[0].headers['X-RateLimit-Remaining'] = '0'
59
+ response[0].headers['X-RateLimit-Reset'] = str(int(min(rate_limit_data[current_user_id]) + 60))
60
+ return response
61
+
62
+ # Add current timestamp to user's requests
63
+ rate_limit_data[current_user_id].append(current_time)
64
+
65
+ # Process the request
66
+ response = f(current_user_id, *args, **kwargs)
67
+
68
+ # Add rate limit headers if response is a tuple (response, status_code)
69
+ if isinstance(response, tuple) and len(response) >= 1:
70
+ response[0].headers['X-RateLimit-Limit'] = str(RATE_LIMIT)
71
+ response[0].headers['X-RateLimit-Remaining'] = str(RATE_LIMIT - len(rate_limit_data[current_user_id]))
72
+ response[0].headers['X-RateLimit-Reset'] = str(int(current_time + 60))
73
+ # If response is just a response object
74
+ elif hasattr(response, 'headers'):
75
+ response.headers['X-RateLimit-Limit'] = str(RATE_LIMIT)
76
+ response.headers['X-RateLimit-Remaining'] = str(RATE_LIMIT - len(rate_limit_data[current_user_id]))
77
+ response.headers['X-RateLimit-Reset'] = str(int(current_time + 60))
78
+
79
+ return response
80
+
81
+ return decorated
82
+
83
+ # Authentication decorator
84
+ def token_required(f):
85
+ @wraps(f)
86
+ def decorated(*args, **kwargs):
87
+ token = None
88
+ if 'Authorization' in request.headers:
89
+ token = request.headers['Authorization'].split(" ")[1]
90
+
91
+ if not token:
92
+ return jsonify({'message': 'Token is missing!'}), 401
93
+
94
+ try:
95
+ data = jwt.decode(token, app.secret_key, algorithms=["HS256"])
96
+ current_user_id = data['user_id']
97
+
98
+ # Initialize user session if not exists
99
+ if current_user_id not in user_sessions:
100
+ user_sessions[current_user_id] = {
101
+ 'model': Model(),
102
+ 'metrics': MetricsCollector(),
103
+ 'url_processed': False
104
+ }
105
+ except:
106
+ return jsonify({'message': 'Token is invalid!'}), 401
107
+
108
+ return f(current_user_id, *args, **kwargs)
109
+ return decorated
110
+
111
+ # Welcome endpoint
112
+ @app.route('/', methods=['GET'])
113
+ def welcome():
114
+ return jsonify({'message': 'Welcome to the Crawlgpt API!'})
115
+
116
+ # USER REGISTRATION
117
+ @app.route('/api/register', methods=['POST'])
118
+ def register():
119
+ data = request.json
120
+ username = data.get('username')
121
+ password = data.get('password')
122
+ email = data.get('email')
123
+ if not username or not password or not email:
124
+ return jsonify({'message': 'Username, password and email are required!'}), 400
125
+ # Check if user already exists
126
+ existing_user = authenticate_user(username, password)
127
+ if existing_user:
128
+ return jsonify({'message': 'User already exists!'}), 400
129
+ # Create user (adapt from your login.py)
130
+ create_user(username, password, email)
131
+
132
+ return jsonify({'message': 'User created successfully!'})
133
+
134
+
135
+ # Login endpoint
136
+ @app.route('/api/login', methods=['POST'])
137
+ def login():
138
+ auth = request.json
139
+
140
+ if not auth or not auth.get('username') or not auth.get('password'):
141
+ return jsonify({'message': 'Could not verify'}), 401
142
+
143
+ # Authenticate user (adapt from your login.py)
144
+ user = authenticate_user(auth.get('username'), auth.get('password'))
145
+
146
+ if not user:
147
+ return jsonify({'message': 'Invalid credentials'}), 401
148
+
149
+ # Generate JWT token
150
+ token = jwt.encode({
151
+ 'user_id': user.id,
152
+ 'username': user.username,
153
+ 'exp': datetime.utcnow().timestamp() + TOKEN_EXPIRATION
154
+ }, app.secret_key, algorithm="HS256")
155
+
156
+ return jsonify({'token': token, 'user': {'id': user.id, 'username': user.username}})
157
+
158
+ # URL processing endpoint
159
+ @app.route('/api/process-url', methods=['POST'])
160
+ @token_required
161
+ @rate_limit
162
+ def process_url(current_user_id):
163
+ data = request.json
164
+ url = data.get('url')
165
+
166
+ if not url or not url.strip():
167
+ return jsonify({'success': False, 'message': 'Please enter a valid URL.'}), 400
168
+
169
+ user_session = user_sessions[current_user_id]
170
+ model = user_session['model']
171
+
172
+ try:
173
+ if not content_validator.is_valid_url(url):
174
+ return jsonify({'success': False, 'message': 'Invalid URL format'}), 400
175
+
176
+ # Create async task for extraction
177
+ async def extract_content():
178
+ start_time = time.time()
179
+
180
+ try:
181
+ success, msg = await model.extract_content_from_url(url)
182
+
183
+ if success:
184
+ user_session['url_processed'] = True
185
+ user_session['metrics'].record_request(
186
+ success=True,
187
+ response_time=time.time() - start_time,
188
+ tokens_used=len(model.context.split())
189
+ )
190
+
191
+ # Save system message about URL processing
192
+ save_chat_message(
193
+ current_user_id,
194
+ f"Content from {url} processed",
195
+ "system",
196
+ model.context
197
+ )
198
+
199
+ return {'success': True, 'message': 'URL processed successfully'}
200
+ else:
201
+ return {'success': False, 'message': msg}
202
+
203
+ except Exception as e:
204
+ user_session['metrics'].record_request(
205
+ success=False,
206
+ response_time=time.time() - start_time,
207
+ tokens_used=0
208
+ )
209
+ return {'success': False, 'message': str(e)}
210
+
211
+ # Using a more explicit approach to run the async function
212
+ loop = asyncio.new_event_loop()
213
+ asyncio.set_event_loop(loop)
214
+ try:
215
+ result = loop.run_until_complete(extract_content())
216
+ finally:
217
+ loop.close()
218
+
219
+ # Return the result
220
+ return jsonify(result)
221
+
222
+ except Exception as e:
223
+ return jsonify({'success': False, 'message': f"Error processing URL: {str(e)}"}), 500
224
+
225
+ # Chat endpoint
226
+ @app.route('/api/chat', methods=['POST'])
227
+ @token_required
228
+ @rate_limit
229
+ def chat_endpoint(current_user_id):
230
+ data = request.json
231
+ user_message = data.get('message')
232
+ temperature = data.get('temperature', 0.7)
233
+ max_tokens = data.get('max_tokens', 5000)
234
+ model_id = data.get('model_id', 'llama-3.1-8b-instant')
235
+ use_summary = data.get('use_summary', False)
236
+
237
+ user_session = user_sessions[current_user_id]
238
+ model = user_session['model']
239
+
240
+ if not user_session['url_processed']:
241
+ return jsonify({'success': False, 'message': 'Please process a URL first'}), 400
242
+
243
+ try:
244
+ start_time = time.time()
245
+
246
+ # Save user message to database
247
+ save_chat_message(
248
+ current_user_id,
249
+ user_message,
250
+ "user",
251
+ model.context
252
+ )
253
+
254
+ # Generate response
255
+ response = model.generate_response(
256
+ user_message,
257
+ temperature,
258
+ max_tokens,
259
+ model_id,
260
+ use_summary=use_summary
261
+ )
262
+
263
+ # Save assistant response to database
264
+ save_chat_message(
265
+ current_user_id,
266
+ response,
267
+ "assistant",
268
+ model.context
269
+ )
270
+
271
+ # Record metrics
272
+ user_session['metrics'].record_request(
273
+ success=True,
274
+ response_time=time.time() - start_time,
275
+ tokens_used=len(response.split())
276
+ )
277
+
278
+ return jsonify({
279
+ 'success': True,
280
+ 'response': response,
281
+ })
282
+
283
+ except Exception as e:
284
+ user_session['metrics'].record_request(
285
+ success=False,
286
+ response_time=time.time() - start_time,
287
+ tokens_used=0
288
+ )
289
+ return jsonify({'success': False, 'message': f"Error generating response: {str(e)}"}), 500
290
+
291
+ # Get chat history
292
+ @app.route('/api/chat/history', methods=['GET'])
293
+ @token_required
294
+ def get_history(current_user_id):
295
+ try:
296
+ # Load chat history from database
297
+ history = get_chat_history(current_user_id)
298
+ messages = [{
299
+ "role": msg.role,
300
+ "content": msg.message,
301
+ "timestamp": msg.timestamp
302
+ } for msg in history]
303
+
304
+ return jsonify({'success': True, 'messages': messages})
305
+
306
+ except Exception as e:
307
+ return jsonify({'success': False, 'message': f"Error fetching history: {str(e)}"}), 500
308
+
309
+ # Clear chat history
310
+ @app.route('/api/chat/clear', methods=['POST'])
311
+ @token_required
312
+ def clear_history(current_user_id):
313
+ try:
314
+ delete_user_chat_history(current_user_id)
315
+ user_sessions[current_user_id]['url_processed'] = False
316
+ return jsonify({'success': True, 'message': 'Chat history cleared'})
317
+
318
+ except Exception as e:
319
+ return jsonify({'success': False, 'message': f"Error clearing history: {str(e)}"}), 500
320
+
321
+ # Restore chat history and context
322
+ @app.route('/api/chat/restore', methods=['POST'])
323
+ @token_required
324
+ def restore_history(current_user_id):
325
+ try:
326
+ user_session = user_sessions[current_user_id]
327
+ model = user_session['model']
328
+
329
+ # Clear existing model state
330
+ model.clear()
331
+
332
+ # Load messages
333
+ messages = restore_chat_history(current_user_id)
334
+
335
+ # Rebuild model context from chat history
336
+ context_parts = [
337
+ msg.get('context') for msg in messages
338
+ if msg.get('context')
339
+ ]
340
+ model.context = "\n".join(context_parts)
341
+
342
+ # Rebuild vector database from context
343
+ if model.context:
344
+ chunks = model.chunk_text(model.context)
345
+ summaries = [model.summarizer.generate_summary(chunk) for chunk in chunks]
346
+ model.database.add_data(chunks, summaries)
347
+ user_session['url_processed'] = True
348
+
349
+ return jsonify({'success': True, 'message': 'Full conversation state restored'})
350
+
351
+ except Exception as e:
352
+ return jsonify({'success': False, 'message': f"Restoration failed: {str(e)}"}), 500
353
+
354
+ # Get metrics
355
+ @app.route('/api/metrics', methods=['GET'])
356
+ @token_required
357
+ def get_metrics(current_user_id):
358
+ try:
359
+ user_session = user_sessions[current_user_id]
360
+ metrics = user_session['metrics'].metrics.to_dict()
361
+ return jsonify({'success': True, 'metrics': metrics})
362
+
363
+ except Exception as e:
364
+ return jsonify({'success': False, 'message': f"Error fetching metrics: {str(e)}"}), 500
365
+
366
+ # Export data
367
+ @app.route('/api/export', methods=['GET'])
368
+ @token_required
369
+ def export_data(current_user_id):
370
+ try:
371
+ user_session = user_sessions[current_user_id]
372
+ model = user_session['model']
373
+
374
+ history = get_chat_history(current_user_id)
375
+ messages = [{
376
+ "role": msg.role,
377
+ "content": msg.message,
378
+ "context": msg.context,
379
+ "timestamp": msg.timestamp
380
+ } for msg in history]
381
+
382
+ export_data = {
383
+ "metrics": user_session['metrics'].metrics.to_dict(),
384
+ "vector_database": model.database.to_dict(),
385
+ "messages": messages
386
+ }
387
+
388
+ return jsonify({'success': True, 'data': export_data})
389
+
390
+ except Exception as e:
391
+ return jsonify({'success': False, 'message': f"Export failed: {str(e)}"}), 500
392
+
393
+ # Import data
394
+ @app.route('/api/import', methods=['POST'])
395
+ @token_required
396
+ def import_data(current_user_id):
397
+ try:
398
+ user_session = user_sessions[current_user_id]
399
+ model = user_session['model']
400
+
401
+ imported_data = request.json.get('data')
402
+ if not imported_data:
403
+ return jsonify({'success': False, 'message': 'No data provided'}), 400
404
+
405
+ # Validate imported data structure
406
+ required_keys = ["metrics", "vector_database", "messages"]
407
+ if not all(key in imported_data for key in required_keys):
408
+ return jsonify({'success': False, 'message': 'Invalid backup file structure'}), 400
409
+
410
+ # Import data with proper state management
411
+ model.import_state(imported_data)
412
+
413
+ # Delete existing chat history
414
+ delete_user_chat_history(current_user_id)
415
+
416
+ # Restore chat history and context from imported data
417
+ if "messages" in imported_data:
418
+ for msg in imported_data["messages"]:
419
+ save_chat_message(
420
+ current_user_id,
421
+ msg["content"],
422
+ msg["role"],
423
+ msg.get("context", "")
424
+ )
425
+
426
+ # Set URL processed state if there's context
427
+ if model.context:
428
+ user_session['url_processed'] = True
429
+ else:
430
+ user_session['url_processed'] = False
431
+
432
+ # Update metrics
433
+ if "metrics" in imported_data:
434
+ user_session['metrics'] = MetricsCollector()
435
+ user_session['metrics'].metrics = Metrics.from_dict(imported_data["metrics"])
436
+
437
+ return jsonify({'success': True, 'message': 'Data imported successfully'})
438
+
439
+ except Exception as e:
440
+ return jsonify({'success': False, 'message': f"Import failed: {str(e)}"}), 500
441
+
442
+ # Update settings
443
+ @app.route('/api/settings', methods=['POST'])
444
+ @token_required
445
+ def update_settings(current_user_id):
446
+ data = request.json
447
+ user_session = user_sessions[current_user_id]
448
+
449
+ try:
450
+ # Update any settings passed in the request
451
+ if 'use_summary' in data:
452
+ user_session['use_summary'] = data['use_summary']
453
+
454
+ return jsonify({'success': True, 'message': 'Settings updated'})
455
+
456
+ except Exception as e:
457
+ return jsonify({'success': False, 'message': f"Error updating settings: {str(e)}"}), 500
458
+
459
+ # Clear all data
460
+ @app.route('/api/clear-all', methods=['POST'])
461
+ @token_required
462
+ def clear_all_data(current_user_id):
463
+ try:
464
+ user_session = user_sessions[current_user_id]
465
+ model = user_session['model']
466
+
467
+ model.clear()
468
+ delete_user_chat_history(current_user_id)
469
+ user_session['url_processed'] = False
470
+ user_session['metrics'] = MetricsCollector()
471
+
472
+ return jsonify({'success': True, 'message': 'All data cleared successfully'})
473
+
474
+ except Exception as e:
475
+ return jsonify({'success': False, 'message': f"Error clearing data: {str(e)}"}), 500
476
+
477
+ if __name__ == '__main__':
478
+ app.run(debug=True)
src/crawlgpt/ui/run.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.crawlgpt.ui.app import app
2
+ import os
3
+
4
+ if __name__ == '__main__':
5
+ env = os.environ.get('FLASK_ENV', 'development')
6
+ debug = env == 'development'
7
+ port = int(os.environ.get('PORT', 5000))
8
+ app.run(debug=debug, port=port, host='0.0.0.0')
9
+
10
+
11
+ # python -m src.crawlgpt.ui.run
test_crawlgpt_api.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # filepath: test_crawlgpt_api.py
3
+ import requests
4
+ import time
5
+ import json
6
+ import random
7
+ import string
8
+ import argparse
9
+ import sys
10
+ from pprint import pprint
11
+
12
+ class CrawlGPTTester:
13
+ def __init__(self, base_url="http://127.0.0.1:5000"):
14
+ self.base_url = base_url
15
+ self.token = None
16
+ self.user_id = None
17
+ self.username = None
18
+ self.test_data = {}
19
+
20
+ def generate_random_string(self, length=8):
21
+ """Generate a random string for test data"""
22
+ return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
23
+
24
+ def print_response(self, response, label):
25
+ """Pretty print API responses"""
26
+ print(f"\n{'=' * 80}")
27
+ print(f"✅ {label} - Status Code: {response.status_code}")
28
+ print(f"{'=' * 80}")
29
+ try:
30
+ pprint(response.json())
31
+ except:
32
+ print(response.text)
33
+ print(f"{'=' * 80}\n")
34
+ sys.stdout.flush()
35
+
36
+ def print_error(self, response, label):
37
+ """Pretty print API error responses"""
38
+ print(f"\n{'=' * 80}")
39
+ print(f"❌ {label} - Status Code: {response.status_code}")
40
+ print(f"{'=' * 80}")
41
+ try:
42
+ pprint(response.json())
43
+ except:
44
+ print(response.text)
45
+ print(f"{'=' * 80}\n")
46
+ sys.stdout.flush()
47
+
48
+ def test_welcome(self):
49
+ """Test welcome endpoint"""
50
+ response = requests.get(f"{self.base_url}/")
51
+ if response.status_code == 200:
52
+ self.print_response(response, "Welcome Endpoint")
53
+ return True
54
+ else:
55
+ self.print_error(response, "Welcome Endpoint")
56
+ return False
57
+
58
+ def test_register(self):
59
+ """Test user registration"""
60
+ self.username = f"testuser_{self.generate_random_string()}"
61
+ password = "Test123!"
62
+ email = f"{self.username}@test.com"
63
+
64
+ data = {
65
+ "username": self.username,
66
+ "password": password,
67
+ "email": email
68
+ }
69
+
70
+ response = requests.post(f"{self.base_url}/api/register", json=data)
71
+ if response.status_code == 200:
72
+ self.print_response(response, "User Registration")
73
+ self.test_data["registration"] = data
74
+ return True
75
+ else:
76
+ self.print_error(response, "User Registration")
77
+ return False
78
+
79
+ def test_login(self):
80
+ """Test user login"""
81
+ if not self.username:
82
+ print("Error: No user registered to log in with")
83
+ return False
84
+
85
+ data = {
86
+ "username": self.username,
87
+ "password": self.test_data["registration"]["password"]
88
+ }
89
+
90
+ response = requests.post(f"{self.base_url}/api/login", json=data)
91
+ if response.status_code == 200:
92
+ self.print_response(response, "User Login")
93
+ response_data = response.json()
94
+ self.token = response_data.get("token")
95
+ self.user_id = response_data.get("user", {}).get("id")
96
+ if not self.token:
97
+ print("Error: No token received from login")
98
+ return False
99
+ return True
100
+ else:
101
+ self.print_error(response, "User Login")
102
+ return False
103
+
104
+ def test_process_url(self, url="https://www.teachermagazine.com/in_en/articles/research-news-disability-inclusion-in-classroom-assessments"):
105
+ """Test URL processing"""
106
+ if not self.token:
107
+ print("Error: Not logged in")
108
+ return False
109
+
110
+ headers = {"Authorization": f"Bearer {self.token}"}
111
+ data = {"url": url}
112
+
113
+ print(f"Processing URL: {url}")
114
+ response = requests.post(f"{self.base_url}/api/process-url", headers=headers, json=data)
115
+
116
+ if response.status_code == 200:
117
+ self.print_response(response, "Process URL")
118
+ return True
119
+ else:
120
+ self.print_error(response, "Process URL")
121
+ return False
122
+
123
+ def test_chat(self, message="What is the main topic of this page?"):
124
+ """Test chat endpoint"""
125
+ if not self.token:
126
+ print("Error: Not logged in")
127
+ return False
128
+
129
+ headers = {"Authorization": f"Bearer {self.token}"}
130
+ data = {
131
+ "message": message,
132
+ "temperature": 0.7,
133
+ "max_tokens": 1000,
134
+ "model_id": "llama-3.1-8b-instant",
135
+ "use_summary": False
136
+ }
137
+
138
+ print(f"Sending chat message: {message}")
139
+ response = requests.post(f"{self.base_url}/api/chat", headers=headers, json=data)
140
+
141
+ if response.status_code == 200:
142
+ self.print_response(response, "Chat")
143
+ return True
144
+ else:
145
+ self.print_error(response, "Chat")
146
+ return False
147
+
148
+ def test_get_history(self):
149
+ """Test getting chat history"""
150
+ if not self.token:
151
+ print("Error: Not logged in")
152
+ return False
153
+
154
+ headers = {"Authorization": f"Bearer {self.token}"}
155
+ response = requests.get(f"{self.base_url}/api/chat/history", headers=headers)
156
+
157
+ if response.status_code == 200:
158
+ self.print_response(response, "Get History")
159
+ return True
160
+ else:
161
+ self.print_error(response, "Get History")
162
+ return False
163
+
164
+ def test_export_data(self):
165
+ """Test data export"""
166
+ if not self.token:
167
+ print("Error: Not logged in")
168
+ return False
169
+
170
+ headers = {"Authorization": f"Bearer {self.token}"}
171
+ response = requests.get(f"{self.base_url}/api/export", headers=headers)
172
+
173
+ if response.status_code == 200:
174
+ self.print_response(response, "Export Data")
175
+ self.test_data["export"] = response.json().get("data")
176
+ return True
177
+ else:
178
+ self.print_error(response, "Export Data")
179
+ return False
180
+
181
+ def test_clear_history(self):
182
+ """Test clearing chat history"""
183
+ if not self.token:
184
+ print("Error: Not logged in")
185
+ return False
186
+
187
+ headers = {"Authorization": f"Bearer {self.token}"}
188
+ response = requests.post(f"{self.base_url}/api/chat/clear", headers=headers)
189
+
190
+ if response.status_code == 200:
191
+ self.print_response(response, "Clear History")
192
+ return True
193
+ else:
194
+ self.print_error(response, "Clear History")
195
+ return False
196
+
197
+ def test_import_data(self):
198
+ """Test data import"""
199
+ if not self.token or "export" not in self.test_data:
200
+ print("Error: No exported data to import")
201
+ return False
202
+
203
+ headers = {"Authorization": f"Bearer {self.token}"}
204
+ data = {"data": self.test_data["export"]}
205
+
206
+ response = requests.post(f"{self.base_url}/api/import", headers=headers, json=data)
207
+
208
+ if response.status_code == 200:
209
+ self.print_response(response, "Import Data")
210
+ return True
211
+ else:
212
+ self.print_error(response, "Import Data")
213
+ return False
214
+
215
+ def test_metrics(self):
216
+ """Test getting metrics"""
217
+ if not self.token:
218
+ print("Error: Not logged in")
219
+ return False
220
+
221
+ headers = {"Authorization": f"Bearer {self.token}"}
222
+ response = requests.get(f"{self.base_url}/api/metrics", headers=headers)
223
+
224
+ if response.status_code == 200:
225
+ self.print_response(response, "Get Metrics")
226
+ return True
227
+ else:
228
+ self.print_error(response, "Get Metrics")
229
+ return False
230
+
231
+ def test_update_settings(self):
232
+ """Test updating settings"""
233
+ if not self.token:
234
+ print("Error: Not logged in")
235
+ return False
236
+
237
+ headers = {"Authorization": f"Bearer {self.token}"}
238
+ data = {"use_summary": True}
239
+
240
+ response = requests.post(f"{self.base_url}/api/settings", headers=headers, json=data)
241
+
242
+ if response.status_code == 200:
243
+ self.print_response(response, "Update Settings")
244
+ return True
245
+ else:
246
+ self.print_error(response, "Update Settings")
247
+ return False
248
+
249
+ def test_clear_all(self):
250
+ """Test clearing all data"""
251
+ if not self.token:
252
+ print("Error: Not logged in")
253
+ return False
254
+
255
+ headers = {"Authorization": f"Bearer {self.token}"}
256
+ response = requests.post(f"{self.base_url}/api/clear-all", headers=headers)
257
+
258
+ if response.status_code == 200:
259
+ self.print_response(response, "Clear All Data")
260
+ return True
261
+ else:
262
+ self.print_error(response, "Clear All Data")
263
+ return False
264
+
265
+ def test_error_cases(self):
266
+ """Test various error cases"""
267
+ results = []
268
+
269
+ # Test invalid URL
270
+ if not self.token:
271
+ print("Error: Not logged in")
272
+ return False
273
+
274
+ headers = {"Authorization": f"Bearer {self.token}"}
275
+
276
+ print("\nTesting error cases...")
277
+
278
+ # Invalid URL
279
+ data = {"url": "not-a-valid-url"}
280
+ response = requests.post(f"{self.base_url}/api/process-url", headers=headers, json=data)
281
+ self.print_response(response, "Invalid URL Test")
282
+ results.append(response.status_code == 400)
283
+
284
+ # Chat without processing URL
285
+ await_clear = requests.post(f"{self.base_url}/api/clear-all", headers=headers)
286
+ data = {"message": "This should fail"}
287
+ response = requests.post(f"{self.base_url}/api/chat", headers=headers, json=data)
288
+ self.print_response(response, "Chat Without URL Test")
289
+ results.append(response.status_code == 400)
290
+
291
+ # Invalid token
292
+ bad_headers = {"Authorization": "Bearer invalid-token"}
293
+ response = requests.get(f"{self.base_url}/api/chat/history", headers=bad_headers)
294
+ self.print_response(response, "Invalid Token Test")
295
+ results.append(response.status_code == 401)
296
+
297
+ return all(results)
298
+
299
+ def run_all_tests(self):
300
+ """Run all tests in sequence"""
301
+ print("Starting CRAWLGPT API Tests...")
302
+
303
+ tests = [
304
+ ("Welcome Endpoint", self.test_welcome),
305
+ ("User Registration", self.test_register),
306
+ ("User Login", self.test_login),
307
+ ("Process URL", self.test_process_url),
308
+ ("Chat", self.test_chat),
309
+ ("Get History", self.test_get_history),
310
+ ("Export Data", self.test_export_data),
311
+ ("Clear History", self.test_clear_history),
312
+ ("Import Data", self.test_import_data),
313
+ ("Get Metrics", self.test_metrics),
314
+ ("Update Settings", self.test_update_settings),
315
+ ("Error Cases", self.test_error_cases),
316
+ ("Clear All Data", self.test_clear_all)
317
+ ]
318
+
319
+ results = {}
320
+
321
+ for name, test_func in tests:
322
+ print(f"\n{'=' * 80}")
323
+ print(f"Running test: {name}")
324
+ print(f"{'=' * 80}")
325
+
326
+ try:
327
+ success = test_func()
328
+ results[name] = success
329
+ if not success:
330
+ print(f"❌ Test '{name}' failed.")
331
+ time.sleep(1) # Brief pause between tests
332
+ except Exception as e:
333
+ print(f"❌ Test '{name}' threw an exception: {str(e)}")
334
+ results[name] = False
335
+
336
+ time.sleep(1) # Brief pause between tests
337
+
338
+ # Print summary
339
+ print("\n" + "=" * 80)
340
+ print("TEST SUMMARY")
341
+ print("=" * 80)
342
+ for name, success in results.items():
343
+ status = "✅ PASSED" if success else "❌ FAILED"
344
+ print(f"{status} - {name}")
345
+ print("=" * 80)
346
+
347
+ success_rate = sum(1 for success in results.values() if success) / len(results) * 100
348
+ print(f"Overall success rate: {success_rate:.2f}%")
349
+
350
+ return all(results.values())
351
+
352
+ if __name__ == "__main__":
353
+ parser = argparse.ArgumentParser(description="Test the CRAWLGPT API")
354
+ parser.add_argument("--url", default="http://127.0.0.1:5000", help="Base URL for the API")
355
+ args = parser.parse_args()
356
+
357
+ tester = CrawlGPTTester(base_url=args.url)
358
+ success = tester.run_all_tests()
359
+
360
+ sys.exit(0 if success else 1)