nlpblogs commited on
Commit
c28e485
·
verified ·
1 Parent(s): d4a0019

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from streamlit_extras.stylable_container import stylable_container
4
+ import time
5
+ import zipfile
6
+ import io
7
+ import nltk
8
+ nltk.download('punkt_tab')
9
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
10
+ from transformers import pipeline
11
+ import re
12
+
13
+
14
+
15
+ with st.sidebar:
16
+ with stylable_container(
17
+ key="test_button",
18
+
19
+ css_styles="""
20
+ button {
21
+ background-color: #0000ff;
22
+ border: none;
23
+ color: white;
24
+ }
25
+ """,
26
+ ):
27
+ st.button("FREE PLAN")
28
+
29
+
30
+ st.subheader("Glossary of tags", divider = "red")
31
+
32
+
33
+ per = st.checkbox("I")
34
+ if per:
35
+ st.write("Person's name")
36
+
37
+ org = st.checkbox("ORG")
38
+ if org:
39
+ st.write("Organization")
40
+
41
+ loc = st.checkbox("LOC")
42
+ if loc:
43
+ st.write("Location")
44
+
45
+ PER = st.checkbox("B-PER")
46
+ if PER:
47
+ st.write("Beginning of a person’s name right after another person’s name")
48
+
49
+ ORG = st.checkbox("B-ORG")
50
+ if ORG:
51
+ st.write("Beginning of an organisation right after another organization")
52
+
53
+ LOC = st.checkbox("B-LOC")
54
+ if LOC:
55
+ st.write("Beginning of a location right after another location")
56
+
57
+ O = st.checkbox("O")
58
+ if O:
59
+ st.write("Outside of a named entity")
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+ st.subheader("Multilingual AI Entity Extractor with :blue[Transformers]")
69
+ st.write("Supported languages: **Arabic, German, English, Spanish, French, Italian, Latvian, Dutch, Portuguese, Chinese**")
70
+ st.divider()
71
+
72
+
73
+ def clear_text():
74
+ st.session_state["text"] = ""
75
+
76
+ text = st.text_input("Paste your text here and then press **enter**. The length of your text should not exceed 2000 words.", key="text")
77
+ st.button("Clear text", on_click=clear_text)
78
+ st.write(text)
79
+
80
+
81
+
82
+ from nltk.tokenize import word_tokenize
83
+
84
+ text1 = re.sub(r'[^\w\s]','',text)
85
+ tokens = word_tokenize(text1)
86
+ st.write("Length", len(tokens))
87
+ st.divider()
88
+
89
+ number = 2000
90
+
91
+ if text is not None and len(tokens) > number:
92
+ st.warning('The length of your text should not exceed 2000 words.')
93
+ st.stop()
94
+
95
+
96
+
97
+ if text is not None:
98
+ tokenizer = AutoTokenizer.from_pretrained("sgarbi/bert-fda-nutrition-ner")
99
+ model = AutoModelForTokenClassification.from_pretrained("sgarbi/bert-fda-nutrition-ner")
100
+ nlp = pipeline("token-classification", model=model, tokenizer=tokenizer)
101
+ ner_results = nlp(text)
102
+ df = pd.DataFrame(ner_results)
103
+
104
+
105
+
106
+ import zipfile
107
+ import io
108
+
109
+ dfa = pd.DataFrame(
110
+ data = {
111
+ 'I': ['Person'],
112
+ 'ORG': ['Organization'],
113
+ 'LOC': ['Location'],
114
+ 'B-PER': ['Beginning of a person’s name right after another person’s name'],
115
+ 'B-ORG': ['Beginning of an organisation right after another organization '],
116
+ 'B-LOC': ['Beginning of a location right after another location'],
117
+ 'O': ['Outside of a named entity ']
118
+
119
+
120
+ }
121
+ )
122
+
123
+
124
+
125
+
126
+
127
+ buf = io.BytesIO()
128
+
129
+ with zipfile.ZipFile(buf, "x") as myzip:
130
+ if text is not None:
131
+ myzip.writestr("Summary of the results.csv", df.to_csv())
132
+
133
+ myzip.writestr("Glossary of tags.csv", dfa.to_csv())
134
+
135
+
136
+ tab1, tab2 = st.tabs(["Summarize", "Download"])
137
+
138
+
139
+ with tab1:
140
+ if text is not None:
141
+ st.dataframe(df, width = 1000)
142
+
143
+
144
+
145
+ with tab2:
146
+ st.download_button(
147
+ label = "Download zip file",
148
+ data=buf.getvalue(),
149
+ file_name="zip file.zip",
150
+ mime="application/zip",
151
+ )
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+