TahaRasouli commited on
Commit
23f0de4
·
verified ·
1 Parent(s): 845e556

Update unified_document_processor.py

Browse files
Files changed (1) hide show
  1. unified_document_processor.py +13 -6
unified_document_processor.py CHANGED
@@ -435,8 +435,10 @@ class UnifiedDocumentProcessor:
435
  parent_path = '/'.join(result['source_info']['path'].split('/')[:-1])
436
  if parent_path:
437
  parent_filter = {
438
- 'source_file': result['metadata']['source_file'],
439
- 'xml_path': parent_path
 
 
440
  }
441
  parent_results = self.collection.query(
442
  query_texts=[""], # Empty query to get exact match
@@ -449,11 +451,14 @@ class UnifiedDocumentProcessor:
449
  'content': parent_results['documents'][0][0],
450
  'metadata': parent_results['metadatas'][0][0]
451
  }
 
452
  # Get immediate children
453
- child_path_prefix = result['source_info']['path'] + '/'
454
  child_filter = {
455
- 'source_file': result['metadata']['source_file'],
456
- 'xml_path': {'$contains': child_path_prefix}
 
 
457
  }
458
  child_results = self.collection.query(
459
  query_texts=[""], # Empty query to get exact matches
@@ -465,7 +470,8 @@ class UnifiedDocumentProcessor:
465
  result['children_info'] = [{
466
  'content': doc,
467
  'metadata': meta
468
- } for doc, meta in zip(child_results['documents'][0], child_results['metadatas'][0])]
 
469
 
470
  hierarchical_results.append(result)
471
 
@@ -480,6 +486,7 @@ class UnifiedDocumentProcessor:
480
  'success': False,
481
  'error': str(e)
482
  }
 
483
  def get_summary_and_details(self, question: str, selected_files: List[str]) -> Dict:
484
  """Get both a summary answer and detailed supporting information"""
485
  try:
 
435
  parent_path = '/'.join(result['source_info']['path'].split('/')[:-1])
436
  if parent_path:
437
  parent_filter = {
438
+ '$and': [
439
+ {'source_file': result['metadata']['source_file']},
440
+ {'xml_path': parent_path}
441
+ ]
442
  }
443
  parent_results = self.collection.query(
444
  query_texts=[""], # Empty query to get exact match
 
451
  'content': parent_results['documents'][0][0],
452
  'metadata': parent_results['metadatas'][0][0]
453
  }
454
+
455
  # Get immediate children
456
+ child_path_prefix = result['source_info']['path']
457
  child_filter = {
458
+ '$and': [
459
+ {'source_file': result['metadata']['source_file']},
460
+ {'xml_path': {'$contains': child_path_prefix}}
461
+ ]
462
  }
463
  child_results = self.collection.query(
464
  query_texts=[""], # Empty query to get exact matches
 
470
  result['children_info'] = [{
471
  'content': doc,
472
  'metadata': meta
473
+ } for doc, meta in zip(child_results['documents'][0], child_results['metadatas'][0])
474
+ if meta['xml_path'] != result['source_info']['path']] # Exclude self from children
475
 
476
  hierarchical_results.append(result)
477
 
 
486
  'success': False,
487
  'error': str(e)
488
  }
489
+
490
  def get_summary_and_details(self, question: str, selected_files: List[str]) -> Dict:
491
  """Get both a summary answer and detailed supporting information"""
492
  try: