File size: 1,418 Bytes
addf769
 
 
 
 
404a876
addf769
 
404a876
addf769
 
 
404a876
addf769
 
 
 
404a876
 
 
 
 
 
 
 
 
 
 
addf769
404a876
addf769
 
 
404a876
addf769
404a876
 
 
 
 
addf769
404a876
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# utils/fallback_suggester.py

import json
from sentence_transformers import SentenceTransformer, util

# πŸ” Load pre-trained semantic similarity model
model = SentenceTransformer("sentence-transformers/paraphrase-mpnet-base-v2")

# πŸ“š Load fallback clause database
with open("fallback_clauses.json", "r", encoding="utf-8") as f:
    clause_bank = json.load(f)

# πŸ”‘ Extract clause labels and text
clause_labels = list(clause_bank.keys())
clause_texts = list(clause_bank.values())
clause_embeddings = model.encode(clause_texts, convert_to_tensor=True)

def suggest_fallback(input_clause: str, top_k: int = 3):
    """
    Suggest top-k fallback clauses based on semantic similarity.

    Args:
        input_clause (str): The clause to analyze.
        top_k (int): Number of fallback suggestions to return.

    Returns:
        str: Formatted fallback suggestions.
    """
    if not input_clause or len(input_clause.strip()) == 0:
        return "⚠️ No input clause provided."

    input_embedding = model.encode(input_clause, convert_to_tensor=True)
    scores = util.cos_sim(input_embedding, clause_embeddings)[0]
    top_indices = scores.topk(k=min(top_k, len(clause_labels))).indices.tolist()

    results = []
    for idx in top_indices:
        label = clause_labels[idx]
        suggestion = clause_texts[idx]
        results.append(f"πŸ”Ή {label}:\n{suggestion}")

    return "\n\n".join(results)