TahaRasouli commited on
Commit
d49c704
·
verified ·
1 Parent(s): 8b2c9e1

Update unified_document_processor.py

Browse files
Files changed (1) hide show
  1. unified_document_processor.py +45 -39
unified_document_processor.py CHANGED
@@ -373,51 +373,57 @@ class UnifiedDocumentProcessor:
373
  return f"Error processing your question: {str(e)}"
374
 
375
  def get_detailed_context(self, question: str, selected_files: List[str], n_results: int = 5) -> Dict:
376
- """Get detailed context including path and metadata information"""
377
- try:
378
- filter_dict = {
379
- 'source_file': {'$in': selected_files}
380
- }
381
-
382
- results = self.collection.query(
383
- query_texts=[question],
384
- n_results=n_results,
385
- where=filter_dict,
386
- include=["documents", "metadatas", "distances"]
387
- )
388
-
389
- if not results['documents'][0]:
390
- return {
391
- 'success': False,
392
- 'error': "No relevant content found"
393
- }
394
 
395
- detailed_results = []
396
- for doc, meta, distance in zip(results['documents'][0], results['metadatas'][0], results['distances'][0]):
397
- result_info = {
398
- 'content': doc,
399
- 'metadata': meta,
400
- 'relevance_score': 1 - distance, # Convert distance to similarity score
401
- 'source_info': {
402
- 'file': meta['source_file'],
403
- 'type': meta['content_type'],
404
- 'path': meta.get('xml_path', 'N/A'), # Only for XML files
405
- 'context': json.loads(meta['context']) if meta.get('context') else {}
406
- }
407
- }
408
- detailed_results.append(result_info)
409
 
 
410
  return {
411
- 'success': True,
412
- 'results': detailed_results,
413
- 'query': question
414
  }
415
 
416
- except Exception as e:
417
- return {
418
- 'success': False,
419
- 'error': str(e)
 
 
 
 
 
 
 
 
 
 
 
420
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  def get_hierarchical_context(self, question: str, selected_files: List[str], n_results: int = 5) -> Dict:
423
  """Get hierarchical context for XML files including parent-child relationships"""
 
373
  return f"Error processing your question: {str(e)}"
374
 
375
  def get_detailed_context(self, question: str, selected_files: List[str], n_results: int = 5) -> Dict:
376
+ """Get detailed context with enhanced scoring"""
377
+ try:
378
+ filter_dict = {
379
+ 'source_file': {'$in': selected_files}
380
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
+ results = self.collection.query(
383
+ query_texts=[question],
384
+ n_results=n_results,
385
+ where=filter_dict,
386
+ include=["documents", "metadatas", "distances"]
387
+ )
 
 
 
 
 
 
 
 
388
 
389
+ if not results['documents'][0]:
390
  return {
391
+ 'success': False,
392
+ 'error': "No relevant content found"
 
393
  }
394
 
395
+ detailed_results = []
396
+ for doc, meta, distance in zip(results['documents'][0], results['metadatas'][0], results['distances'][0]):
397
+ # Calculate detailed score
398
+ detailed_score = self.calculate_detailed_score(distance, meta, doc, question)
399
+
400
+ result_info = {
401
+ 'content': doc,
402
+ 'metadata': meta,
403
+ 'score_details': detailed_score,
404
+ 'source_info': {
405
+ 'file': meta['source_file'],
406
+ 'type': meta['content_type'],
407
+ 'path': meta.get('xml_path', 'N/A'),
408
+ 'context': json.loads(meta['context']) if meta.get('context') else {}
409
+ }
410
  }
411
+ detailed_results.append(result_info)
412
+
413
+ # Sort results by total score
414
+ detailed_results.sort(key=lambda x: x['score_details']['total_score'], reverse=True)
415
+
416
+ return {
417
+ 'success': True,
418
+ 'results': detailed_results,
419
+ 'query': question
420
+ }
421
+
422
+ except Exception as e:
423
+ return {
424
+ 'success': False,
425
+ 'error': str(e)
426
+ }
427
 
428
  def get_hierarchical_context(self, question: str, selected_files: List[str], n_results: int = 5) -> Dict:
429
  """Get hierarchical context for XML files including parent-child relationships"""