File size: 1,803 Bytes
d8b68f2 7a0f886 a828ab6 a37770c e2be02d a828ab6 56bc884 0270794 fb62543 e2be02d 56bc884 d8b68f2 a37770c d8b68f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import streamlit as st
import spacy
from spacy import displacy
import json
# Initialize spaCy
nlp = spacy.load("en_core_web_md", disable=["ner"])
# Define sample data
data = {
"fruit": ["apple", "pear", "orange"],
"vegetable": ["broccoli", "spinach", "tomato"],
"meat": ['beef', 'pork', 'turkey', 'duck'],
"mobile" :['8920136576']
}
nlp.add_pipe(
"concise_concepts",
config={
"data": data,
"ent_score": True, # Entity Scoring section
"verbose": True,
"exclude_pos": ["VERB", "AUX"],
"exclude_dep": ["DOBJ", "PCOMP"],
"include_compound_words": False,
"json_path": "./fruitful_patterns.json",
"topn": (100,500,300,100)
},
)
# Streamlit app
st.title('Named Entity Recognition with spaCy')
user_input = st.text_area("Enter text:", "")
if st.button("Process"):
if user_input:
# Process the text
doc = nlp(user_input)
# Visualization options
options = {
"colors": {"fruit": "darkorange", "vegetable": "limegreen", "meat": "salmon","mobile":"blue"},
"ents": ["fruit", "vegetable", "meat","mobile"],
}
# JSON serialization with only entity and type
result_dict = {'entities': []}
for ent in doc.ents:
ent_data = {
'entity': ent.text,
'type': ent.label_
}
result_dict['entities'].append(ent_data)
result_json = json.dumps(result_dict, indent=4)
# Display results
st.subheader("Named Entities")
html = displacy.render(doc, style="ent", page=True, minify=True)
st.write(html, unsafe_allow_html=True)
st.subheader("Entities in JSON format")
st.json(result_json)
|