Spaces:
Sleeping
Sleeping
Update unified_document_processor.py
Browse files
unified_document_processor.py
CHANGED
@@ -435,8 +435,10 @@ class UnifiedDocumentProcessor:
|
|
435 |
parent_path = '/'.join(result['source_info']['path'].split('/')[:-1])
|
436 |
if parent_path:
|
437 |
parent_filter = {
|
438 |
-
'
|
439 |
-
|
|
|
|
|
440 |
}
|
441 |
parent_results = self.collection.query(
|
442 |
query_texts=[""], # Empty query to get exact match
|
@@ -449,11 +451,14 @@ class UnifiedDocumentProcessor:
|
|
449 |
'content': parent_results['documents'][0][0],
|
450 |
'metadata': parent_results['metadatas'][0][0]
|
451 |
}
|
|
|
452 |
# Get immediate children
|
453 |
-
child_path_prefix = result['source_info']['path']
|
454 |
child_filter = {
|
455 |
-
'
|
456 |
-
|
|
|
|
|
457 |
}
|
458 |
child_results = self.collection.query(
|
459 |
query_texts=[""], # Empty query to get exact matches
|
@@ -465,7 +470,8 @@ class UnifiedDocumentProcessor:
|
|
465 |
result['children_info'] = [{
|
466 |
'content': doc,
|
467 |
'metadata': meta
|
468 |
-
} for doc, meta in zip(child_results['documents'][0], child_results['metadatas'][0])
|
|
|
469 |
|
470 |
hierarchical_results.append(result)
|
471 |
|
@@ -480,6 +486,7 @@ class UnifiedDocumentProcessor:
|
|
480 |
'success': False,
|
481 |
'error': str(e)
|
482 |
}
|
|
|
483 |
def get_summary_and_details(self, question: str, selected_files: List[str]) -> Dict:
|
484 |
"""Get both a summary answer and detailed supporting information"""
|
485 |
try:
|
|
|
435 |
parent_path = '/'.join(result['source_info']['path'].split('/')[:-1])
|
436 |
if parent_path:
|
437 |
parent_filter = {
|
438 |
+
'$and': [
|
439 |
+
{'source_file': result['metadata']['source_file']},
|
440 |
+
{'xml_path': parent_path}
|
441 |
+
]
|
442 |
}
|
443 |
parent_results = self.collection.query(
|
444 |
query_texts=[""], # Empty query to get exact match
|
|
|
451 |
'content': parent_results['documents'][0][0],
|
452 |
'metadata': parent_results['metadatas'][0][0]
|
453 |
}
|
454 |
+
|
455 |
# Get immediate children
|
456 |
+
child_path_prefix = result['source_info']['path']
|
457 |
child_filter = {
|
458 |
+
'$and': [
|
459 |
+
{'source_file': result['metadata']['source_file']},
|
460 |
+
{'xml_path': {'$contains': child_path_prefix}}
|
461 |
+
]
|
462 |
}
|
463 |
child_results = self.collection.query(
|
464 |
query_texts=[""], # Empty query to get exact matches
|
|
|
470 |
result['children_info'] = [{
|
471 |
'content': doc,
|
472 |
'metadata': meta
|
473 |
+
} for doc, meta in zip(child_results['documents'][0], child_results['metadatas'][0])
|
474 |
+
if meta['xml_path'] != result['source_info']['path']] # Exclude self from children
|
475 |
|
476 |
hierarchical_results.append(result)
|
477 |
|
|
|
486 |
'success': False,
|
487 |
'error': str(e)
|
488 |
}
|
489 |
+
|
490 |
def get_summary_and_details(self, question: str, selected_files: List[str]) -> Dict:
|
491 |
"""Get both a summary answer and detailed supporting information"""
|
492 |
try:
|