sunilpuri commited on
Commit
65dc74a
1 Parent(s): 043003d

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.txt +13 -0
  2. app.py +81 -0
  3. examples.json +18 -0
  4. requirements.txt +71 -0
README.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ⚕️MedNER - Biomed Entity Recognizer
3
+ emoji: 👩‍⚕️🩺⚕️🙋
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.8
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import json
4
+ from collections import defaultdict
5
+
6
+ # Create tokenizer for biomed model
7
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
8
+ tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
9
+ model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
10
+ pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
11
+
12
+ # Matplotlib for entity graph
13
+ import matplotlib.pyplot as plt
14
+ plt.switch_backend("Agg")
15
+
16
+ # Load examples from JSON
17
+ EXAMPLES = {}
18
+ with open("examples.json", "r") as f:
19
+ example_json = json.load(f)
20
+ EXAMPLES = {x["text"]: x["label"] for x in example_json}
21
+
22
+ def group_by_entity(raw):
23
+ out = defaultdict(int)
24
+ for ent in raw:
25
+ out[ent["entity_group"]] += 1
26
+ # out["total"] = sum(out.values())
27
+ return out
28
+
29
+
30
+ def plot_to_figure(grouped):
31
+ fig = plt.figure()
32
+ plt.bar(x=list(grouped.keys()), height=list(grouped.values()))
33
+ plt.margins(0.2)
34
+ plt.subplots_adjust(bottom=0.4)
35
+ plt.xticks(rotation=90)
36
+ return fig
37
+
38
+
39
+ def ner(text):
40
+ raw = pipe(text)
41
+ ner_content = {
42
+ "text": text,
43
+ "entities": [
44
+ {
45
+ "entity": x["entity_group"],
46
+ "word": x["word"],
47
+ "score": x["score"],
48
+ "start": x["start"],
49
+ "end": x["end"],
50
+ }
51
+ for x in raw
52
+ ],
53
+ }
54
+
55
+ grouped = group_by_entity(raw)
56
+ figure = plot_to_figure(grouped)
57
+ label = EXAMPLES.get(text, "Unknown")
58
+
59
+ meta = {
60
+ "entity_counts": grouped,
61
+ "entities": len(set(grouped.keys())),
62
+ "counts": sum(grouped.values()),
63
+ }
64
+
65
+ return (ner_content, meta, label, figure)
66
+
67
+
68
+ interface = gr.Interface(
69
+ ner,
70
+ inputs=gr.Textbox(label="Note text", value=""),
71
+ outputs=[
72
+ gr.HighlightedText(label="NER", combine_adjacent=True),
73
+ gr.JSON(label="Entity Counts"),
74
+ gr.Label(label="Rating"),
75
+ gr.Plot(label="Bar"),
76
+ ],
77
+ examples=list(EXAMPLES.keys()),
78
+ allow_flagging="never",
79
+ )
80
+
81
+ interface.launch()
examples.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "text": "In March and April the patient had two falls. One was related to asthma, heart palpitations. The second was due to syncope and post covid vaccination dizziness during exercise. The patient is now getting an EKG. Former EKG had shown that there was a bundle branch block. Patient had some uncontrolled immune system reactions like anaphylaxis and shortness of breath.",
4
+ "label": 3
5
+ },
6
+ {
7
+ "text": "During the fertility engagement, there was alot of anxiety and we just felt depressed. It was difficult but finally after nutrition advice and changing habits such as cutting out smoking and improving daily activities we felt less agitated. Then our baby came. When he was born in December he had some minor jaundice but nothing serious, yet we had problems sleeping. Both of us felt so relieved to have had the help, and now that the baby doesn't cry all night, we feel less fatigue and can sleep through night.",
8
+ "label": 2
9
+ },
10
+ {
11
+ "text": "51 y/o male with history of skin cancer, asthma, allergic to NSAIDS, dogs, cats, mold. Daily long distance swimming helped breathing problems. COVID reactions were severe yet recovery was fastest when maintaining a healthy diet, low fat and chloresterol, as well as limiting sugar intake.",
12
+ "label": 0
13
+ },
14
+ {
15
+ "text": "SNOMEDCT 183452005 is an emergency hospital encounter. CPT 99378 is an intervention with supervision of a hospice patient. HCPCS T2046 Hospice long term care, room and board only; per diem. LOINC for Care Plan Document Type is code 80748-7 - Clinical genetics Plan of care note. LOINC Care Plan Document Type 80751-1 is a Dermatology Plan of care note. ICD10CM M84.371A is a Stress fracture, right ankle, initial encounter for fracture which is a Condition/Diagnosis/Problem Lower Body Fracture. The purpose of this value set is to represent concepts for a diagnosis of a fracture of a bone in the lower body.",
16
+ "label": 1
17
+ }
18
+ ]
requirements.txt ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.3
2
+ aiosignal==1.2.0
3
+ anyio==3.6.1
4
+ async-timeout==4.0.2
5
+ attrs==22.1.0
6
+ bcrypt==4.0.1
7
+ black==22.10.0
8
+ certifi==2022.9.24
9
+ cffi==1.15.1
10
+ charset-normalizer==2.1.1
11
+ click==8.1.3
12
+ contourpy==1.0.5
13
+ cryptography==38.0.1
14
+ cycler==0.11.0
15
+ fastapi==0.85.0
16
+ ffmpy==0.3.0
17
+ filelock==3.8.0
18
+ fonttools==4.37.4
19
+ frozenlist==1.3.1
20
+ fsspec==2022.8.2
21
+ gradio==3.4.1
22
+ h11==0.12.0
23
+ httpcore==0.15.0
24
+ httpx==0.23.0
25
+ huggingface-hub==0.10.0
26
+ idna==3.4
27
+ Jinja2==3.1.2
28
+ kiwisolver==1.4.4
29
+ linkify-it-py==1.0.3
30
+ markdown-it-py==2.1.0
31
+ MarkupSafe==2.1.1
32
+ matplotlib==3.6.1
33
+ mdit-py-plugins==0.3.1
34
+ mdurl==0.1.2
35
+ multidict==6.0.2
36
+ mypy-extensions==0.4.3
37
+ numpy==1.23.3
38
+ orjson==3.8.0
39
+ packaging==21.3
40
+ pandas==1.5.0
41
+ paramiko==2.11.0
42
+ pathspec==0.10.1
43
+ Pillow==9.2.0
44
+ platformdirs==2.5.2
45
+ pycparser==2.21
46
+ pycryptodome==3.15.0
47
+ pydantic==1.10.2
48
+ pydub==0.25.1
49
+ PyNaCl==1.5.0
50
+ pyparsing==3.0.9
51
+ python-dateutil==2.8.2
52
+ python-multipart==0.0.5
53
+ pytz==2022.4
54
+ PyYAML==6.0
55
+ regex==2022.9.13
56
+ requests==2.28.1
57
+ rfc3986==1.5.0
58
+ six==1.16.0
59
+ sniffio==1.3.0
60
+ starlette==0.20.4
61
+ tokenizers==0.12.1
62
+ tomli==2.0.1
63
+ torch==1.12.1
64
+ tqdm==4.64.1
65
+ transformers==4.22.2
66
+ typing_extensions==4.4.0
67
+ uc-micro-py==1.0.1
68
+ urllib3==1.26.12
69
+ uvicorn==0.18.3
70
+ websockets==10.3
71
+ yarl==1.8.1