File size: 897 Bytes
6f18aef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import json
import re

# Load model JSON
with open("en.json", "r") as f:
    model_data = json.load(f)

# Define regex patterns
patterns = {
    "phone": r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b",
    "url": r"https?://\S+|www\.\S+",
    "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
}

# Simulate entity classification
def classify_text(text):
    annotations = []

    for entity, pattern in patterns.items():
        matches = re.findall(pattern, text)
        for match in matches:
            annotations.append({"token": match, "type": entity, "confidence_score": 0.9})

    return {"annotations": annotations}

# Test classification
test_text = "Hello world this is Call 123-456-7890 or visit www.example.com or email [email protected] soe other text."
result = classify_text(test_text)
print("Classification Result:", json.dumps(result, indent=2))