Spaces:
Running
Running
Upload 22 files
Browse files- app.py +15 -0
- interfaces/__init__.py +7 -0
- interfaces/__pycache__/base_pipeline.cpython-310.pyc +0 -0
- interfaces/__pycache__/landing.cpython-310.pyc +0 -0
- interfaces/__pycache__/ner.cpython-310.pyc +0 -0
- interfaces/__pycache__/open_ie.cpython-310.pyc +0 -0
- interfaces/__pycache__/qa.cpython-310.pyc +0 -0
- interfaces/__pycache__/relation_e.cpython-310.pyc +0 -0
- interfaces/__pycache__/summ.cpython-310.pyc +0 -0
- interfaces/__pycache__/summarization.cpython-310.pyc +0 -0
- interfaces/__pycache__/text_cleaning.cpython-310.pyc +0 -0
- interfaces/__pycache__/universal.cpython-310.pyc +0 -0
- interfaces/base_pipeline.py +42 -0
- interfaces/landing.py +373 -0
- interfaces/ner.py +83 -0
- interfaces/open_ie.py +109 -0
- interfaces/qa.py +92 -0
- interfaces/relation_e.py +99 -0
- interfaces/summarization.py +104 -0
- interfaces/text_cleaning.py +97 -0
- materials/introduction.html +74 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
subprocess.run(["pip", "uninstall", "gradio"])
|
3 |
+
subprocess.run(["pip", "install", "utca>=0.1.0"])
|
4 |
+
subprocess.run(["pip", "install", "gradio==4.31.5"])
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
from interfaces import ner_interface, qa_interface, open_ie_interface, summarization_interface, landing_interface, relation_e_interface, text_cleaning_interface
|
8 |
+
|
9 |
+
demo = gr.TabbedInterface([landing_interface, ner_interface, qa_interface, open_ie_interface, summarization_interface, relation_e_interface, text_cleaning_interface],
|
10 |
+
["Introduction", "NER", "Question Answering", "Open Information Extracting", "Summarization", "Relation Extracting", "Text Cleaning"],
|
11 |
+
title="UTC (Universal Token Classification) HandyLab",
|
12 |
+
theme=gr.themes.Base())
|
13 |
+
|
14 |
+
demo.queue()
|
15 |
+
demo.launch(debug=True, share=True)
|
interfaces/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .qa import qa_interface
|
2 |
+
from .ner import ner_interface
|
3 |
+
from .open_ie import open_ie_interface
|
4 |
+
from .summarization import summarization_interface
|
5 |
+
from .relation_e import relation_e_interface
|
6 |
+
from .landing import landing_interface
|
7 |
+
from .text_cleaning import text_cleaning_interface
|
interfaces/__pycache__/base_pipeline.cpython-310.pyc
ADDED
Binary file (1.95 kB). View file
|
|
interfaces/__pycache__/landing.cpython-310.pyc
ADDED
Binary file (16.5 kB). View file
|
|
interfaces/__pycache__/ner.cpython-310.pyc
ADDED
Binary file (4.89 kB). View file
|
|
interfaces/__pycache__/open_ie.cpython-310.pyc
ADDED
Binary file (5.01 kB). View file
|
|
interfaces/__pycache__/qa.cpython-310.pyc
ADDED
Binary file (3.84 kB). View file
|
|
interfaces/__pycache__/relation_e.cpython-310.pyc
ADDED
Binary file (3.78 kB). View file
|
|
interfaces/__pycache__/summ.cpython-310.pyc
ADDED
Binary file (5.52 kB). View file
|
|
interfaces/__pycache__/summarization.cpython-310.pyc
ADDED
Binary file (5.11 kB). View file
|
|
interfaces/__pycache__/text_cleaning.cpython-310.pyc
ADDED
Binary file (5.08 kB). View file
|
|
interfaces/__pycache__/universal.cpython-310.pyc
ADDED
Binary file (5.7 kB). View file
|
|
interfaces/base_pipeline.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utca.core import RenameAttribute,Flush
|
2 |
+
from utca.implementation.predictors import TokenSearcherPredictor, TokenSearcherPredictorConfig
|
3 |
+
from utca.implementation.tasks import TokenSearcherNER, TokenSearcherNERPostprocessor
|
4 |
+
from utca.implementation.predictors.token_searcher.token_searcher_pipeline import TokenClassificationPipeline
|
5 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
6 |
+
|
7 |
+
predictor = TokenSearcherPredictor(
|
8 |
+
TokenSearcherPredictorConfig(
|
9 |
+
model_name = "knowledgator/UTC-DeBERTa-large-v2",
|
10 |
+
device="cpu"
|
11 |
+
)
|
12 |
+
)
|
13 |
+
|
14 |
+
def generate_pipeline(threshold: float = 0.5):
|
15 |
+
ner_task = TokenSearcherNER(
|
16 |
+
predictor=predictor,
|
17 |
+
postprocess=TokenSearcherNERPostprocessor(
|
18 |
+
threshold=threshold
|
19 |
+
)
|
20 |
+
)
|
21 |
+
pipeline = (
|
22 |
+
ner_task
|
23 |
+
| Flush(keys=["labels"])
|
24 |
+
| RenameAttribute("output", "entities")
|
25 |
+
)
|
26 |
+
|
27 |
+
return pipeline
|
28 |
+
|
29 |
+
tokenizer = AutoTokenizer.from_pretrained("knowledgator/UTC-DeBERTa-large-v2")
|
30 |
+
model = AutoModelForTokenClassification.from_pretrained("knowledgator/UTC-DeBERTa-large-v2")
|
31 |
+
|
32 |
+
transformers_pipeline = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy = 'first')
|
33 |
+
|
34 |
+
if __name__=="__main__":
|
35 |
+
pipeline = generate_pipeline()
|
36 |
+
res = pipeline.run({
|
37 |
+
"text": """Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal "Nature Neuroscience".
|
38 |
+
His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease. Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect.
|
39 |
+
Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment."""
|
40 |
+
})
|
41 |
+
|
42 |
+
print(res)
|
interfaces/landing.py
ADDED
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
|
4 |
+
with open('materials/introduction.html', 'r', encoding='utf-8') as file:
|
5 |
+
html_description = file.read()
|
6 |
+
|
7 |
+
with gr.Blocks() as landing_interface:
|
8 |
+
gr.HTML(html_description)
|
9 |
+
|
10 |
+
with gr.Accordion("How to run this model locally", open=False):
|
11 |
+
with gr.Accordion("Simple pipelines", open=False):
|
12 |
+
gr.Markdown(
|
13 |
+
"""
|
14 |
+
## Installation
|
15 |
+
To use this model with UTCA framework or transformers, you must install the UTCA Python library firstly (it includes transformers):
|
16 |
+
|
17 |
+
With UTCA:
|
18 |
+
```
|
19 |
+
pip install utca -U
|
20 |
+
```
|
21 |
+
With transformers:
|
22 |
+
```
|
23 |
+
pip install transformers
|
24 |
+
```
|
25 |
+
## Usage
|
26 |
+
Once you've downloaded the GLiNER library, you can import the GLiNER class. You can then load this model using `GLiNER.from_pretrained` and predict entities with `predict_entities`.
|
27 |
+
"""
|
28 |
+
)
|
29 |
+
gr.Code(
|
30 |
+
'''
|
31 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
32 |
+
from utca.implementation.predictors.token_searcher.token_searcher_pipeline import TokenClassificationPipeline
|
33 |
+
|
34 |
+
def process(text, prompt, treshold=0.5):
|
35 |
+
"""
|
36 |
+
Processes text by preparing prompt and adjusting indices.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
text (str): The text to process
|
40 |
+
prompt (str): The prompt to prepend to the text
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
list: A list of dicts with adjusted spans and scores
|
44 |
+
"""
|
45 |
+
# Concatenate text and prompt for full input
|
46 |
+
input_ = f"{prompt}\n{text}"
|
47 |
+
results = nlp(input_) # Run NLP on full input
|
48 |
+
processed_results = []
|
49 |
+
prompt_length = len(prompt) # Get prompt length
|
50 |
+
for result in results:
|
51 |
+
# check whether score is higher than treshold
|
52 |
+
if result['score']<treshold:
|
53 |
+
continue
|
54 |
+
# Adjust indices by subtracting prompt length
|
55 |
+
start = result['start'] - prompt_length
|
56 |
+
# If indexes belongs to the prompt - continue
|
57 |
+
if start<0:
|
58 |
+
continue
|
59 |
+
end = result['end'] - prompt_length
|
60 |
+
# Extract span from original text using adjusted indices
|
61 |
+
span = text[start:end]
|
62 |
+
# Create processed result dict
|
63 |
+
processed_result = {
|
64 |
+
'span': span,
|
65 |
+
'start': start,
|
66 |
+
'end': end,
|
67 |
+
'score': result['score']
|
68 |
+
}
|
69 |
+
processed_results.append(processed_result)
|
70 |
+
return processed_results
|
71 |
+
|
72 |
+
tokenizer = AutoTokenizer.from_pretrained("knowledgator/UTC-DeBERTa-large-v2")
|
73 |
+
model = AutoModelForTokenClassification.from_pretrained("knowledgator/UTC-DeBERTa-large-v2")
|
74 |
+
|
75 |
+
nlp = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy = 'first')
|
76 |
+
# or with transformers
|
77 |
+
nlp = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy = 'first')
|
78 |
+
|
79 |
+
''',
|
80 |
+
language="python",
|
81 |
+
)
|
82 |
+
gr.Markdown(
|
83 |
+
"""
|
84 |
+
To use the model for zero-shot named entity recognition, we recommend to utilize the following prompt:
|
85 |
+
"""
|
86 |
+
)
|
87 |
+
gr.Code(
|
88 |
+
'''
|
89 |
+
prompt = """Identify the following entity classes in the text:
|
90 |
+
computer
|
91 |
+
|
92 |
+
Text:
|
93 |
+
"""
|
94 |
+
text = """Apple was founded as Apple Computer Company on April 1, 1976, by Steve Wozniak, Steve Jobs (1955–2011) and Ronald Wayne to develop and sell Wozniak's Apple I personal computer.
|
95 |
+
It was incorporated by Jobs and Wozniak as Apple Computer, Inc. in 1977. The company's second computer, the Apple II, became a best seller and one of the first mass-produced microcomputers.
|
96 |
+
Apple went public in 1980 to instant financial success."""
|
97 |
+
|
98 |
+
results = process(text, prompt)
|
99 |
+
|
100 |
+
print(results)
|
101 |
+
''',
|
102 |
+
language="python",
|
103 |
+
)
|
104 |
+
gr.Markdown(
|
105 |
+
"""
|
106 |
+
To use the model for open information extracttion, put any prompt you want:
|
107 |
+
"""
|
108 |
+
)
|
109 |
+
gr.Code(
|
110 |
+
'''
|
111 |
+
prompt = """Extract all positive aspects about the product
|
112 |
+
"""
|
113 |
+
text = """I recently purchased the Sony WH-1000XM4 Wireless Noise-Canceling Headphones from Amazon and I must say, I'm thoroughly impressed. The package arrived in New York within 2 days, thanks to Amazon Prime's expedited shipping.
|
114 |
+
|
115 |
+
The headphones themselves are remarkable. The noise-canceling feature works like a charm in the bustling city environment, and the 30-hour battery life means I don't have to charge them every day. Connecting them to my Samsung Galaxy S21 was a breeze, and the sound quality is second to none.
|
116 |
+
|
117 |
+
I also appreciated the customer service from Amazon when I had a question about the warranty. They responded within an hour and provided all the information I needed.
|
118 |
+
|
119 |
+
However, the headphones did not come with a hard case, which was listed in the product description. I contacted Amazon, and they offered a 10% discount on my next purchase as an apology.
|
120 |
+
|
121 |
+
Overall, I'd give these headphones a 4.5/5 rating and highly recommend them to anyone looking for top-notch quality in both product and service."""
|
122 |
+
|
123 |
+
results = process(text, prompt)
|
124 |
+
|
125 |
+
print(results)
|
126 |
+
''',
|
127 |
+
language="python",
|
128 |
+
)
|
129 |
+
gr.Markdown(
|
130 |
+
"""
|
131 |
+
To try the model in question answering, just specify question and text passage:
|
132 |
+
"""
|
133 |
+
)
|
134 |
+
gr.Code(
|
135 |
+
'''
|
136 |
+
question = """Who are the founders of Microsoft?"""
|
137 |
+
|
138 |
+
text = """Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters for the Altair 8800.
|
139 |
+
During his career at Microsoft, Gates held the positions of chairman, chief executive officer, president and chief software architect, while also being the largest individual shareholder until May 2014."""
|
140 |
+
|
141 |
+
input_ = f"{question} {text}"
|
142 |
+
|
143 |
+
results = process(text, question)
|
144 |
+
|
145 |
+
print(results)
|
146 |
+
|
147 |
+
print(results)
|
148 |
+
''',
|
149 |
+
language="python",
|
150 |
+
)
|
151 |
+
gr.Markdown(
|
152 |
+
"""
|
153 |
+
For the text cleaning, please, specify the following prompt, it will recognize the part of the text that should be erased:
|
154 |
+
"""
|
155 |
+
)
|
156 |
+
gr.Code(
|
157 |
+
'''
|
158 |
+
prompt = """Clean the following text extracted from the web matching not relevant parts:"""
|
159 |
+
|
160 |
+
text = """The mechanism of action was characterized using native mass spectrometry, the thermal shift-binding assay, and enzymatic kinetic studies (Figure ). In the native mass spectrometry binding assay, compound 23R showed dose-dependent binding to SARS-CoV-2 Mpro, similar to the positive control GC376, with a binding stoichiometry of one drug per monomer (Figure A).
|
161 |
+
Similarly, compound 23R showed dose-dependent stabilization of the SARS-CoV-2 Mpro in the thermal shift binding assay with an apparent Kd value of 9.43 μM, a 9.3-fold decrease compared to ML188 (1) (Figure B). In the enzymatic kinetic studies, 23R was shown to be a noncovalent inhibitor with a Ki value of 0.07 μM (Figure C, D top and middle panels). In comparison, the Ki for the parent compound ML188 (1) is 2.29 μM.
|
162 |
+
The Lineweaver–Burk or double-reciprocal plot with different compound concentrations yielded an intercept at the Y-axis, suggesting that 23R is a competitive inhibitor similar to ML188 (1) (Figure C, D bottom panel). Buy our T-shirts for the lowerst prices you can find!!! Overall, the enzymatic kinetic studies confirmed that compound 23R is a noncovalent inhibitor of SARS-CoV-2 Mpro."""
|
163 |
+
|
164 |
+
results = process(text, prompt)
|
165 |
+
|
166 |
+
print(results)
|
167 |
+
|
168 |
+
''',
|
169 |
+
language="python",
|
170 |
+
)
|
171 |
+
gr.Markdown(
|
172 |
+
"""
|
173 |
+
It's possible to use the model for relation extraction, it allows in N*C operations to extract all relations between entities, where N - number of entities and C - number of classes:
|
174 |
+
"""
|
175 |
+
)
|
176 |
+
gr.Code(
|
177 |
+
'''
|
178 |
+
rex_prompt="""
|
179 |
+
Identify target entity given the following relation: "{}" and the following source entity: "{}"
|
180 |
+
|
181 |
+
Text:
|
182 |
+
"""
|
183 |
+
|
184 |
+
text = """Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal "Nature Neuroscience". """
|
185 |
+
|
186 |
+
entity = "Paul Hammond"
|
187 |
+
|
188 |
+
relation = "worked at"
|
189 |
+
|
190 |
+
prompt = rex_prompt.format(relation, entity)
|
191 |
+
|
192 |
+
results = process(text, prompt)
|
193 |
+
|
194 |
+
print(results)
|
195 |
+
''',
|
196 |
+
language="python",
|
197 |
+
)
|
198 |
+
gr.Markdown(
|
199 |
+
"""
|
200 |
+
To find similar entities in the text, consider the following example:
|
201 |
+
"""
|
202 |
+
)
|
203 |
+
gr.Code(
|
204 |
+
'''
|
205 |
+
ent_prompt = "Find all '{}' mentions in the text:"
|
206 |
+
|
207 |
+
text = """Several studies have reported its pharmacological activities, including anti-inflammatory, antimicrobial, and antitumoral effects. The effect of E-anethole was studied in the osteosarcoma MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay. It showed a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway. Additionally, it induced cell cycle arrest at the G0/G1 phase, up-regulated the expression of p53, caspase-3, and caspase-9, and down-regulated Bcl-xL expression. Moreover, the antitumoral activity of anethole was assessed against oral tumor Ca9-22 cells, and the cytotoxic effects were evaluated by MTT and LDH assays. It demonstrated a LD50 value of 8 μM, and cellular proliferation was 42.7% and 5.2% at anethole concentrations of 3 μM and 30 μM, respectively. It was reported that it could selectively and in a dose-dependent manner decrease cell proliferation and induce apoptosis, as well as induce autophagy, decrease ROS production, and increase glutathione activity. The cytotoxic effect was mediated through NF-kB, MAP kinases, Wnt, caspase-3 and -9, and PARP1 pathways. Additionally, treatment with anethole inhibited cyclin D1 oncogene expression, increased cyclin-dependent kinase inhibitor p21WAF1, up-regulated p53 expression, and inhibited the EMT markers."""
|
208 |
+
|
209 |
+
entity = "anethole"
|
210 |
+
|
211 |
+
prompt = ent_prompt.format(entity)
|
212 |
+
|
213 |
+
results = process(text, prompt)
|
214 |
+
|
215 |
+
print(results)
|
216 |
+
|
217 |
+
''',
|
218 |
+
language="python",
|
219 |
+
)
|
220 |
+
gr.Markdown(
|
221 |
+
"""
|
222 |
+
We significantly improved model summarization abilities in comparison to the first version, below is an example:
|
223 |
+
"""
|
224 |
+
)
|
225 |
+
gr.Code(
|
226 |
+
'''
|
227 |
+
prompt = "Summarize the following text, highlighting the most important sentences:"
|
228 |
+
|
229 |
+
text = """Apple was founded as Apple Computer Company on April 1, 1976, by Steve Wozniak, Steve Jobs (1955–2011) and Ronald Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and Wozniak as Apple Computer, Inc. in 1977. The company's second computer, the Apple II, became a best seller and one of the first mass-produced microcomputers. Apple went public in 1980 to instant financial success. The company developed computers featuring innovative graphical user interfaces, including the 1984 original Macintosh, announced that year in a critically acclaimed advertisement called "1984". By 1985, the high cost of its products, and power struggles between executives, caused problems. Wozniak stepped back from Apple and pursued other ventures, while Jobs resigned and founded NeXT, taking some Apple employees with him.
|
230 |
+
Apple Inc. is an American multinational technology company headquartered in Cupertino, California. Apple is the world's largest technology company by revenue, with US$394.3 billion in 2022 revenue. As of March 2023, Apple is the world's biggest company by market capitalization. As of June 2022, Apple is the fourth-largest personal computer vendor by unit sales and the second-largest mobile phone manufacturer in the world. It is considered one of the Big Five American information technology companies, alongside Alphabet (parent company of Google), Amazon, Meta Platforms, and Microsoft.
|
231 |
+
As the market for personal computers expanded and evolved throughout the 1990s, Apple lost considerable market share to the lower-priced duopoly of the Microsoft Windows operating system on Intel-powered PC clones (also known as "Wintel"). In 1997, weeks away from bankruptcy, the company bought NeXT to resolve Apple's unsuccessful operating system strategy and entice Jobs back to the company. Over the next decade, Jobs guided Apple back to profitability through a number of tactics including introducing the iMac, iPod, iPhone and iPad to critical acclaim, launching the "Think different" campaign and other memorable advertising campaigns, opening the Apple Store retail chain, and acquiring numerous companies to broaden the company's product portfolio. When Jobs resigned in 2011 for health reasons, and died two months later, he was succeeded as CEO by Tim Cook"""
|
232 |
+
|
233 |
+
results = process(text, prompt)
|
234 |
+
|
235 |
+
print(results)
|
236 |
+
''',
|
237 |
+
language="python",
|
238 |
+
)
|
239 |
+
with gr.Accordion("Advances pipelines with UTCA", open=False):
|
240 |
+
gr.Markdown(
|
241 |
+
"""
|
242 |
+
Firstly, you need to create predictor that will run UTC model:
|
243 |
+
"""
|
244 |
+
)
|
245 |
+
gr.Code(
|
246 |
+
'''
|
247 |
+
from utca.core import (
|
248 |
+
AddData,
|
249 |
+
RenameAttribute,
|
250 |
+
Flush
|
251 |
+
)
|
252 |
+
from utca.implementation.predictors import (
|
253 |
+
TokenSearcherPredictor, TokenSearcherPredictorConfig
|
254 |
+
)
|
255 |
+
from utca.implementation.tasks import (
|
256 |
+
TokenSearcherNER,
|
257 |
+
TokenSearcherNERPostprocessor,
|
258 |
+
)
|
259 |
+
|
260 |
+
predictor = TokenSearcherPredictor(
|
261 |
+
TokenSearcherPredictorConfig(
|
262 |
+
device="cuda:0",
|
263 |
+
model="knowledgator/UTC-DeBERTa-large-v2"
|
264 |
+
)
|
265 |
+
)
|
266 |
+
''',
|
267 |
+
language="python",
|
268 |
+
)
|
269 |
+
gr.Markdown(
|
270 |
+
"""
|
271 |
+
For NER model you should create the following pipeline:
|
272 |
+
"""
|
273 |
+
)
|
274 |
+
gr.Code(
|
275 |
+
'''
|
276 |
+
ner_task = TokenSearcherNER(
|
277 |
+
predictor=predictor,
|
278 |
+
postprocess=[TokenSearcherNERPostprocessor(
|
279 |
+
threshold=0.5
|
280 |
+
)]
|
281 |
+
)
|
282 |
+
|
283 |
+
ner_task = TokenSearcherNER()
|
284 |
+
|
285 |
+
pipeline = (
|
286 |
+
AddData({"labels": ["scientist", "university", "city"]})
|
287 |
+
| ner_task
|
288 |
+
| Flush(keys=["labels"])
|
289 |
+
| RenameAttribute("output", "entities")
|
290 |
+
)
|
291 |
+
|
292 |
+
''',
|
293 |
+
language="python",
|
294 |
+
)
|
295 |
+
gr.Markdown(
|
296 |
+
"""
|
297 |
+
And after that you can put your text for prediction and run the pipeline:
|
298 |
+
"""
|
299 |
+
)
|
300 |
+
gr.Code(
|
301 |
+
'''
|
302 |
+
res = pipeline.run({
|
303 |
+
"text": """Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal "Nature Neuroscience".
|
304 |
+
His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease. Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect.
|
305 |
+
Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment."""
|
306 |
+
})
|
307 |
+
''',
|
308 |
+
language="python",
|
309 |
+
)
|
310 |
+
gr.Markdown(
|
311 |
+
"""
|
312 |
+
To use utca for relation extraction construct the following pipeline:
|
313 |
+
"""
|
314 |
+
)
|
315 |
+
gr.Code(
|
316 |
+
'''
|
317 |
+
from utca.implementation.tasks import (
|
318 |
+
TokenSearcherNER,
|
319 |
+
TokenSearcherNERPostprocessor,
|
320 |
+
TokenSearcherRelationExtraction,
|
321 |
+
TokenSearcherRelationExtractionPostprocessor,
|
322 |
+
)
|
323 |
+
|
324 |
+
pipe = (
|
325 |
+
TokenSearcherNER( # TokenSearcherNER task produces classified entities that will be at the "output" key.
|
326 |
+
predictor=predictor,
|
327 |
+
postprocess=TokenSearcherNERPostprocessor(
|
328 |
+
threshold=0.5 # Entity threshold
|
329 |
+
)
|
330 |
+
)
|
331 |
+
| RenameAttribute("output", "entities") # Rename output entities from TokenSearcherNER task to use them as inputs in TokenSearcherRelationExtraction
|
332 |
+
| TokenSearcherRelationExtraction( # TokenSearcherRelationExtraction is used for relation extraction.
|
333 |
+
predictor=predictor,
|
334 |
+
postprocess=TokenSearcherRelationExtractionPostprocessor(
|
335 |
+
threshold=0.5 # Relation threshold
|
336 |
+
)
|
337 |
+
)
|
338 |
+
)
|
339 |
+
''',
|
340 |
+
language="python",
|
341 |
+
)
|
342 |
+
gr.Markdown(
|
343 |
+
"""
|
344 |
+
To run pipeline you need to specify parameters for entities and relations:
|
345 |
+
"""
|
346 |
+
)
|
347 |
+
gr.Code(
|
348 |
+
'''
|
349 |
+
r = pipe.run({
|
350 |
+
"text": text, # Text to process
|
351 |
+
"labels": [ # Labels used by TokenSearcherNER for entity extraction
|
352 |
+
"scientist",
|
353 |
+
"university",
|
354 |
+
"city",
|
355 |
+
"research",
|
356 |
+
"journal",
|
357 |
+
],
|
358 |
+
"relations": [{ # Relation parameters
|
359 |
+
"relation": "published at", # Relation label. Required parameter.
|
360 |
+
"pairs_filter": [("scientist", "journal")], # Optional parameter. It specifies possible members of relations by their entity labels.
|
361 |
+
# Here, "scientist" is the entity label of the source, and "journal" is the target's entity label.
|
362 |
+
# If provided, only specified pairs will be returned.
|
363 |
+
},{
|
364 |
+
"relation": "worked at",
|
365 |
+
"pairs_filter": [("scientist", "university"), ("scientist", "other")],
|
366 |
+
"distance_threshold": 100, # Optional parameter. It specifies the max distance between spans in the text (i.e., the end of the span that is closer to the start of the text and the start of the next one).
|
367 |
+
}]
|
368 |
+
})
|
369 |
+
|
370 |
+
print(r["output"])
|
371 |
+
''',
|
372 |
+
language="python",
|
373 |
+
)
|
interfaces/ner.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Union, List
|
2 |
+
from .base_pipeline import generate_pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
|
6 |
+
text1 = """
|
7 |
+
"I recently purchased the Sony WH-1000XM4 Wireless Noise-Canceling Headphones from Amazon and I must say, I'm thoroughly impressed. The package arrived in New York within 2 days, thanks to Amazon Prime's expedited shipping.
|
8 |
+
|
9 |
+
The headphones themselves are remarkable. The noise-canceling feature works like a charm in the bustling city environment, and the 30-hour battery life means I don't have to charge them every day. Connecting them to my Samsung Galaxy S21 was a breeze, and the sound quality is second to none.
|
10 |
+
|
11 |
+
I also appreciated the customer service from Amazon when I had a question about the warranty. They responded within an hour and provided all the information I needed.
|
12 |
+
|
13 |
+
However, the headphones did not come with a hard case, which was listed in the product description. I contacted Amazon, and they offered a 10% discount on my next purchase as an apology.
|
14 |
+
|
15 |
+
Overall, I'd give these headphones a 4.5/5 rating and highly recommend them to anyone looking for top-notch quality in both product and service."""
|
16 |
+
|
17 |
+
|
18 |
+
text3 = """
|
19 |
+
Several studies have reported its pharmacological activities, including anti-inflammatory, antimicrobial, and antitumoral effects.
|
20 |
+
The effect of E-anethole was studied in the osteosarcoma MG-63 cell line, and the antiproliferative activity was evaluated by an MTT assay.
|
21 |
+
It showed a GI50 value of 60.25 μM with apoptosis induction through the mitochondrial-mediated pathway. Additionally, it induced cell cycle arrest at the G0/G1 phase, up-regulated the expression of p53, caspase-3, and caspase-9, and down-regulated Bcl-xL expression.
|
22 |
+
Moreover, the antitumoral activity of anethole was assessed against oral tumor Ca9-22 cells, and the cytotoxic effects were evaluated by MTT and LDH assays.
|
23 |
+
It demonstrated a LD50 value of 8 μM, and cellular proliferation was 42.7% and 5.2% at anethole concentrations of 3 μM and 30 μM, respectively.
|
24 |
+
It was reported that it could selectively and in a dose-dependent manner decrease cell proliferation and induce apoptosis, as well as induce autophagy, decrease ROS production, and increase glutathione activity. The cytotoxic effect was mediated through NF-kB, MAP kinases, Wnt, caspase-3 and -9, and PARP1 pathways. Additionally, treatment with anethole inhibited cyclin D1 oncogene expression, increased cyclin-dependent kinase inhibitor p21WAF1, up-regulated p53 expression, and inhibited the EMT markers.
|
25 |
+
"""
|
26 |
+
|
27 |
+
text5 = """
|
28 |
+
Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal "Nature Neuroscience". His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease. Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect. Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment.
|
29 |
+
"""
|
30 |
+
|
31 |
+
ner_examples = [
|
32 |
+
[
|
33 |
+
text5,
|
34 |
+
"neurologist, scientist, gene, disease, biological process, city, journal, university",
|
35 |
+
0.5,
|
36 |
+
False
|
37 |
+
],
|
38 |
+
[
|
39 |
+
text1,
|
40 |
+
"product, brand, location, features, rating",
|
41 |
+
0.5,
|
42 |
+
False
|
43 |
+
],
|
44 |
+
[
|
45 |
+
text3,
|
46 |
+
"cell line, protein, metric, substance",
|
47 |
+
0.5,
|
48 |
+
False
|
49 |
+
]]
|
50 |
+
|
51 |
+
def process(
|
52 |
+
text, labels: str, threshold: float
|
53 |
+
) -> Dict[str, Union[str, int, float]]:
|
54 |
+
labels = [label.strip() for label in labels.split(",")]
|
55 |
+
pipeline = generate_pipeline(threshold)
|
56 |
+
r = pipeline.run({"text": text, "labels": labels})
|
57 |
+
|
58 |
+
return r
|
59 |
+
|
60 |
+
with gr.Blocks(title="NER Task") as ner_interface:
|
61 |
+
input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
|
62 |
+
labels = gr.Textbox(label="Labels", placeholder="Enter your labels here (comma separated)", scale=2)
|
63 |
+
threshold = gr.Slider(0, 1, value=0.3, step=0.01, label="Threshold", info="Lower the threshold to increase how many entities get predicted.")
|
64 |
+
output = gr.HighlightedText(label="Predicted Entities")
|
65 |
+
submit_btn = gr.Button("Submit")
|
66 |
+
examples = gr.Examples(
|
67 |
+
ner_examples,
|
68 |
+
fn=process,
|
69 |
+
inputs=[input_text, labels, threshold],
|
70 |
+
outputs=output,
|
71 |
+
cache_examples=True
|
72 |
+
)
|
73 |
+
theme=gr.themes.Base()
|
74 |
+
|
75 |
+
input_text.submit(fn=process, inputs=[input_text, labels, threshold], outputs=output)
|
76 |
+
labels.submit(fn=process, inputs=[input_text, labels, threshold], outputs=output)
|
77 |
+
threshold.release(fn=process, inputs=[input_text, labels, threshold], outputs=output)
|
78 |
+
submit_btn.click(fn=process, inputs=[input_text, labels, threshold], outputs=output)
|
79 |
+
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
|
83 |
+
ner_interface.launch()
|
interfaces/open_ie.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Union, List
|
2 |
+
from .base_pipeline import transformers_pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
|
6 |
+
text1 = """
|
7 |
+
"I recently purchased the Sony WH-1000XM4 Wireless Noise-Canceling Headphones from Amazon and I must say, I'm thoroughly impressed. The package arrived in New York within 2 days, thanks to Amazon Prime's expedited shipping.
|
8 |
+
|
9 |
+
The headphones themselves are remarkable. The noise-canceling feature works like a charm in the bustling city environment, and the 30-hour battery life means I don't have to charge them every day. Connecting them to my Samsung Galaxy S21 was a breeze, and the sound quality is second to none.
|
10 |
+
|
11 |
+
I also appreciated the customer service from Amazon when I had a question about the warranty. They responded within an hour and provided all the information I needed.
|
12 |
+
|
13 |
+
However, the headphones did not come with a hard case, which was listed in the product description. I contacted Amazon, and they offered a 10% discount on my next purchase as an apology.
|
14 |
+
|
15 |
+
Overall, I'd give these headphones a 4.5/5 rating and highly recommend them to anyone looking for top-notch quality in both product and service."""
|
16 |
+
|
17 |
+
text2 = """
|
18 |
+
Apple Inc. is an American multinational technology company headquartered in Cupertino, California. Apple is the world's largest technology company by revenue, with US$394.3 billion in 2022 revenue. As of March 2023, Apple is the world's biggest company by market capitalization. As of June 2022, Apple is the fourth-largest personal computer vendor by unit sales and the second-largest mobile phone manufacturer in the world. It is considered one of the Big Five American information technology companies, alongside Alphabet (parent company of Google), Amazon, Meta Platforms, and Microsoft.
|
19 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, chief executive officer, president and chief software architect, while also being the largest individual shareholder until May 2014.
|
20 |
+
Apple was founded as Apple Computer Company on April 1, 1976, by Steve Wozniak, Steve Jobs (1955–2011) and Ronald Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and Wozniak as Apple Computer, Inc. in 1977. The company's second computer, the Apple II, became a best seller and one of the first mass-produced microcomputers. Apple went public in 1980 to instant financial success. The company developed computers featuring innovative graphical user interfaces, including the 1984 original Macintosh, announced that year in a critically acclaimed advertisement called "1984". By 1985, the high cost of its products, and power struggles between executives, caused problems. Wozniak stepped back from Apple and pursued other ventures, while Jobs resigned and founded NeXT, taking some Apple employees with him.
|
21 |
+
"""
|
22 |
+
|
23 |
+
open_ie_examples = [
|
24 |
+
[
|
25 |
+
f"Extract all positive aspects of the product:",
|
26 |
+
text1,
|
27 |
+
0.5,
|
28 |
+
False
|
29 |
+
],
|
30 |
+
[
|
31 |
+
f"Extract all negative aspects of the product:",
|
32 |
+
text1,
|
33 |
+
0.5,
|
34 |
+
False
|
35 |
+
],
|
36 |
+
[
|
37 |
+
f"Find the name of the computer for which the first Microsoft product was developed:",
|
38 |
+
text2,
|
39 |
+
0.5,
|
40 |
+
False
|
41 |
+
]]
|
42 |
+
|
43 |
+
|
44 |
+
def process(prompt, text, treshold=0.5, label = "match"):
|
45 |
+
"""
|
46 |
+
Processes text by preparing prompt and adjusting indices.
|
47 |
+
|
48 |
+
Args:
|
49 |
+
text (str): The text to process
|
50 |
+
prompt (str): The prompt to prepend to the text
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
list: A list of dicts with adjusted spans and scores
|
54 |
+
"""
|
55 |
+
# Concatenate text and prompt for full input
|
56 |
+
input_ = f"{prompt}\n{text}"
|
57 |
+
results = transformers_pipeline(input_) # Run NLP on full input
|
58 |
+
processed_results = []
|
59 |
+
prompt_length = len(prompt) # Get prompt length
|
60 |
+
for result in results:
|
61 |
+
# check whether score is higher than treshold
|
62 |
+
if result['score']<treshold:
|
63 |
+
continue
|
64 |
+
# Adjust indices by subtracting prompt length
|
65 |
+
start = result['start'] - prompt_length
|
66 |
+
# If indexes belongs to the prompt - continue
|
67 |
+
if start<0:
|
68 |
+
continue
|
69 |
+
end = result['end'] - prompt_length
|
70 |
+
# Extract span from original text using adjusted indices
|
71 |
+
span = text[start:end]
|
72 |
+
# Create processed result dict
|
73 |
+
processed_result = {
|
74 |
+
'span': span,
|
75 |
+
'start': start,
|
76 |
+
'end': end,
|
77 |
+
'score': result['score'],
|
78 |
+
'entity': label
|
79 |
+
}
|
80 |
+
processed_results.append(processed_result)
|
81 |
+
|
82 |
+
output = {"text": text, "entities": processed_results}
|
83 |
+
return output
|
84 |
+
|
85 |
+
|
86 |
+
with gr.Blocks(title="Open Information Extracting") as open_ie_interface:
|
87 |
+
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here")
|
88 |
+
input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
|
89 |
+
threshold = gr.Slider(0, 1, value=0.3, step=0.01, label="Threshold", info="Lower the threshold to increase how many entities get predicted.")
|
90 |
+
output = gr.HighlightedText(label="Predicted Entities")
|
91 |
+
submit_btn = gr.Button("Submit")
|
92 |
+
examples = gr.Examples(
|
93 |
+
open_ie_examples,
|
94 |
+
fn=process,
|
95 |
+
inputs=[prompt, input_text, threshold],
|
96 |
+
outputs=output,
|
97 |
+
cache_examples=True
|
98 |
+
)
|
99 |
+
theme=gr.themes.Base()
|
100 |
+
|
101 |
+
input_text.submit(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
102 |
+
prompt.submit(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
103 |
+
threshold.release(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
104 |
+
submit_btn.click(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
105 |
+
|
106 |
+
|
107 |
+
if __name__ == "__main__":
|
108 |
+
|
109 |
+
open_ie_interface.launch()
|
interfaces/qa.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Union, List
|
2 |
+
from .base_pipeline import transformers_pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
|
6 |
+
text2 = """
|
7 |
+
Apple Inc. is an American multinational technology company headquartered in Cupertino, California. Apple is the world's largest technology company by revenue, with US$394.3 billion in 2022 revenue. As of March 2023, Apple is the world's biggest company by market capitalization. As of June 2022, Apple is the fourth-largest personal computer vendor by unit sales and the second-largest mobile phone manufacturer in the world. It is considered one of the Big Five American information technology companies, alongside Alphabet (parent company of Google), Amazon, Meta Platforms, and Microsoft.
|
8 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, chief executive officer, president and chief software architect, while also being the largest individual shareholder until May 2014.
|
9 |
+
Apple was founded as Apple Computer Company on April 1, 1976, by Steve Wozniak, Steve Jobs (1955–2011) and Ronald Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and Wozniak as Apple Computer, Inc. in 1977. The company's second computer, the Apple II, became a best seller and one of the first mass-produced microcomputers. Apple went public in 1980 to instant financial success. The company developed computers featuring innovative graphical user interfaces, including the 1984 original Macintosh, announced that year in a critically acclaimed advertisement called "1984". By 1985, the high cost of its products, and power struggles between executives, caused problems. Wozniak stepped back from Apple and pursued other ventures, while Jobs resigned and founded NeXT, taking some Apple employees with him.
|
10 |
+
"""
|
11 |
+
|
12 |
+
qa_examples = [
|
13 |
+
[
|
14 |
+
f"Who are the founders of Microsoft",
|
15 |
+
text2,
|
16 |
+
0.1,
|
17 |
+
False
|
18 |
+
],
|
19 |
+
[
|
20 |
+
f"Who are the founders of Apple?",
|
21 |
+
text2,
|
22 |
+
0.5,
|
23 |
+
False
|
24 |
+
]]
|
25 |
+
|
26 |
+
|
27 |
+
def process(prompt, text, treshold=0.5, label = "answer"):
|
28 |
+
"""
|
29 |
+
Processes text by preparing prompt and adjusting indices.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
text (str): The text to process
|
33 |
+
prompt (str): The prompt to prepend to the text
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
list: A list of dicts with adjusted spans and scores
|
37 |
+
"""
|
38 |
+
# Concatenate text and prompt for full input
|
39 |
+
input_ = f"{prompt}\n{text}"
|
40 |
+
results = transformers_pipeline(input_) # Run NLP on full input
|
41 |
+
processed_results = []
|
42 |
+
prompt_length = len(prompt) # Get prompt length
|
43 |
+
for result in results:
|
44 |
+
# check whether score is higher than treshold
|
45 |
+
if result['score']<treshold:
|
46 |
+
continue
|
47 |
+
# Adjust indices by subtracting prompt length
|
48 |
+
start = result['start'] - prompt_length
|
49 |
+
# If indexes belongs to the prompt - continue
|
50 |
+
if start<0:
|
51 |
+
continue
|
52 |
+
end = result['end'] - prompt_length
|
53 |
+
# Extract span from original text using adjusted indices
|
54 |
+
span = text[start:end]
|
55 |
+
# Create processed result dict
|
56 |
+
processed_result = {
|
57 |
+
'span': span,
|
58 |
+
'start': start,
|
59 |
+
'end': end,
|
60 |
+
'score': result['score'],
|
61 |
+
'entity': label
|
62 |
+
}
|
63 |
+
processed_results.append(processed_result)
|
64 |
+
|
65 |
+
output = {"text": text, "entities": processed_results}
|
66 |
+
return output
|
67 |
+
|
68 |
+
|
69 |
+
with gr.Blocks(title="Question Answering Task") as qa_interface:
|
70 |
+
question = gr.Textbox(label="Question", placeholder="Enter your question here")
|
71 |
+
input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
|
72 |
+
threshold = gr.Slider(0, 1, value=0.3, step=0.01, label="Threshold", info="Lower the threshold to increase how many entities get predicted.")
|
73 |
+
output = gr.HighlightedText(label="Predicted Entities")
|
74 |
+
submit_btn = gr.Button("Submit")
|
75 |
+
examples = gr.Examples(
|
76 |
+
qa_examples,
|
77 |
+
fn=process,
|
78 |
+
inputs=[question, input_text, threshold],
|
79 |
+
outputs=output,
|
80 |
+
cache_examples=True
|
81 |
+
)
|
82 |
+
theme=gr.themes.Base()
|
83 |
+
|
84 |
+
input_text.submit(fn=process, inputs=[question, input_text, threshold], outputs=output)
|
85 |
+
question.submit(fn=process, inputs=[question, input_text, threshold], outputs=output)
|
86 |
+
threshold.release(fn=process, inputs=[question, input_text, threshold], outputs=output)
|
87 |
+
submit_btn.click(fn=process, inputs=[question, input_text, threshold], outputs=output)
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
|
92 |
+
qa_interface.launch()
|
interfaces/relation_e.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utca.core import RenameAttribute
|
2 |
+
from utca.implementation.predictors import TokenSearcherPredictor, TokenSearcherPredictorConfig
|
3 |
+
from utca.implementation.tasks import TokenSearcherNER, TokenSearcherNERPostprocessor, TokenSearcherRelationExtraction, TokenSearcherRelationExtractionPostprocessor
|
4 |
+
from typing import Dict, Union
|
5 |
+
import gradio as gr
|
6 |
+
|
7 |
+
text = """
|
8 |
+
Dr. Paul Hammond, a renowned neurologist at Johns Hopkins University, has recently published a paper in the prestigious journal \"Nature Neuroscience\".
|
9 |
+
His research focuses on a rare genetic mutation, found in less than 0.01% of the population, that appears to prevent the development of Alzheimer's disease.
|
10 |
+
Collaborating with researchers at the University of California, San Francisco, the team is now working to understand the mechanism by which this mutation confers its protective effect.
|
11 |
+
Funded by the National Institutes of Health, their research could potentially open new avenues for Alzheimer's treatment.
|
12 |
+
"""
|
13 |
+
predictor = TokenSearcherPredictor(
|
14 |
+
TokenSearcherPredictorConfig(
|
15 |
+
device="cuda:0",
|
16 |
+
model="knowledgator/UTC-DeBERTa-large-v2"
|
17 |
+
)
|
18 |
+
)
|
19 |
+
|
20 |
+
pipe = (
|
21 |
+
TokenSearcherNER( # TokenSearcherNER task produces classified entities that will be at the "output" key.
|
22 |
+
predictor=predictor,
|
23 |
+
postprocess=TokenSearcherNERPostprocessor(
|
24 |
+
threshold=0.5 # Entity threshold
|
25 |
+
)
|
26 |
+
)
|
27 |
+
| RenameAttribute("output", "entities") # Rename output entities from TokenSearcherNER task to use them as inputs in TokenSearcherRelationExtraction
|
28 |
+
| TokenSearcherRelationExtraction( # TokenSearcherRelationExtraction is used for relation extraction.
|
29 |
+
predictor=predictor,
|
30 |
+
postprocess=TokenSearcherRelationExtractionPostprocessor(
|
31 |
+
threshold=0.5 # Relation threshold
|
32 |
+
)
|
33 |
+
)
|
34 |
+
)
|
35 |
+
|
36 |
+
|
37 |
+
def process(
|
38 |
+
relation: str, text, distance_threshold: str, pairs_filter: str, labels: str
|
39 |
+
) -> Dict[str, Union[str, int, float]]:
|
40 |
+
pairs_filter = [tuple(pair.strip() for pair in pair.split("->")) for pair in pairs_filter.split(",")]
|
41 |
+
|
42 |
+
if len(distance_threshold) < 1 or not distance_threshold or not distance_threshold.strip().isdigit():
|
43 |
+
r = pipe.run({
|
44 |
+
"text": text,
|
45 |
+
"labels": [label.strip() for label in labels.split(",")],
|
46 |
+
"relations": [{
|
47 |
+
"relation": relation,
|
48 |
+
"pairs_filter": pairs_filter
|
49 |
+
}]
|
50 |
+
})
|
51 |
+
elif int(distance_threshold.strip()):
|
52 |
+
r = pipe.run({
|
53 |
+
"text": text,
|
54 |
+
"labels": [label.strip() for label in labels.split(",")],
|
55 |
+
"relations": [{
|
56 |
+
"relation": relation,
|
57 |
+
"pairs_filter": pairs_filter,
|
58 |
+
"distance_threshold": int(distance_threshold.replace(" ", ""))
|
59 |
+
}]
|
60 |
+
})
|
61 |
+
|
62 |
+
return r["output"]
|
63 |
+
|
64 |
+
relation_e_examples = [
|
65 |
+
[
|
66 |
+
"worked at",
|
67 |
+
text,
|
68 |
+
"None",
|
69 |
+
"scientist -> university, scientist -> other",
|
70 |
+
"scientist, university, city, research, journal"]
|
71 |
+
]
|
72 |
+
|
73 |
+
with gr.Blocks(title="Open Information Extracting") as relation_e_interface:
|
74 |
+
relation = gr.Textbox(label="Relation", placeholder="Enter relation you want to extract here")
|
75 |
+
input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
|
76 |
+
labels = gr.Textbox(label="Labels", placeholder="Enter your labels here (comma separated)", scale=2)
|
77 |
+
pairs_filter = gr.Textbox(label="Pairs Filter", placeholder="It specifies possible members of relations by their entity labels. Write as: source -> target,..")
|
78 |
+
distance_threshold = gr.Textbox(label="Distance Threshold", placeholder="It specifies the max distance in characters between spans in the text")
|
79 |
+
output = gr.Textbox(label="Predicted Relation")
|
80 |
+
submit_btn = gr.Button("Submit")
|
81 |
+
examples = gr.Examples(
|
82 |
+
relation_e_examples,
|
83 |
+
fn=process,
|
84 |
+
inputs=[relation, input_text, distance_threshold, pairs_filter, labels],
|
85 |
+
outputs=output,
|
86 |
+
cache_examples=True
|
87 |
+
)
|
88 |
+
theme=gr.themes.Base()
|
89 |
+
|
90 |
+
input_text.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
|
91 |
+
labels.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
|
92 |
+
pairs_filter.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
|
93 |
+
submit_btn.click(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
|
94 |
+
distance_threshold.submit(fn=process, inputs=[relation, input_text, distance_threshold, pairs_filter, labels], outputs=output)
|
95 |
+
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
|
99 |
+
relation_e_interface.launch()
|
interfaces/summarization.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Union, List
|
2 |
+
from .base_pipeline import transformers_pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
|
6 |
+
text1 = """
|
7 |
+
"I recently purchased the Sony WH-1000XM4 Wireless Noise-Canceling Headphones from Amazon and I must say, I'm thoroughly impressed. The package arrived in New York within 2 days, thanks to Amazon Prime's expedited shipping.
|
8 |
+
|
9 |
+
The headphones themselves are remarkable. The noise-canceling feature works like a charm in the bustling city environment, and the 30-hour battery life means I don't have to charge them every day. Connecting them to my Samsung Galaxy S21 was a breeze, and the sound quality is second to none.
|
10 |
+
|
11 |
+
I also appreciated the customer service from Amazon when I had a question about the warranty. They responded within an hour and provided all the information I needed.
|
12 |
+
|
13 |
+
However, the headphones did not come with a hard case, which was listed in the product description. I contacted Amazon, and they offered a 10% discount on my next purchase as an apology.
|
14 |
+
|
15 |
+
Overall, I'd give these headphones a 4.5/5 rating and highly recommend them to anyone looking for top-notch quality in both product and service."""
|
16 |
+
|
17 |
+
text2 = """
|
18 |
+
Apple Inc. is an American multinational technology company headquartered in Cupertino, California. Apple is the world's largest technology company by revenue, with US$394.3 billion in 2022 revenue. As of March 2023, Apple is the world's biggest company by market capitalization. As of June 2022, Apple is the fourth-largest personal computer vendor by unit sales and the second-largest mobile phone manufacturer in the world. It is considered one of the Big Five American information technology companies, alongside Alphabet (parent company of Google), Amazon, Meta Platforms, and Microsoft.
|
19 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, chief executive officer, president and chief software architect, while also being the largest individual shareholder until May 2014.
|
20 |
+
Apple was founded as Apple Computer Company on April 1, 1976, by Steve Wozniak, Steve Jobs (1955–2011) and Ronald Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and Wozniak as Apple Computer, Inc. in 1977. The company's second computer, the Apple II, became a best seller and one of the first mass-produced microcomputers. Apple went public in 1980 to instant financial success. The company developed computers featuring innovative graphical user interfaces, including the 1984 original Macintosh, announced that year in a critically acclaimed advertisement called "1984". By 1985, the high cost of its products, and power struggles between executives, caused problems. Wozniak stepped back from Apple and pursued other ventures, while Jobs resigned and founded NeXT, taking some Apple employees with him.
|
21 |
+
"""
|
22 |
+
|
23 |
+
open_ie_examples = [
|
24 |
+
[
|
25 |
+
f"Summarize the given text, highlighting the most important information:",
|
26 |
+
text1,
|
27 |
+
0.5,
|
28 |
+
False
|
29 |
+
],
|
30 |
+
[
|
31 |
+
f"Summarize the given text, highlighting the most important information:",
|
32 |
+
text2,
|
33 |
+
0.5,
|
34 |
+
False
|
35 |
+
]]
|
36 |
+
|
37 |
+
|
38 |
+
def process(prompt, text, treshold=0.5, label = "summary"):
|
39 |
+
"""
|
40 |
+
Processes text by preparing prompt and adjusting indices.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
text (str): The text to process
|
44 |
+
prompt (str): The prompt to prepend to the text
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
list: A list of dicts with adjusted spans and scores
|
48 |
+
"""
|
49 |
+
# Concatenate text and prompt for full input
|
50 |
+
input_ = f"{prompt}\n{text}"
|
51 |
+
results = transformers_pipeline(input_) # Run NLP on full input
|
52 |
+
processed_results = []
|
53 |
+
prompt_length = len(prompt) # Get prompt length
|
54 |
+
for result in results:
|
55 |
+
# check whether score is higher than treshold
|
56 |
+
if result['score']<treshold:
|
57 |
+
continue
|
58 |
+
# Adjust indices by subtracting prompt length
|
59 |
+
start = result['start'] - prompt_length
|
60 |
+
# If indexes belongs to the prompt - continue
|
61 |
+
if start<0:
|
62 |
+
continue
|
63 |
+
end = result['end'] - prompt_length
|
64 |
+
# Extract span from original text using adjusted indices
|
65 |
+
span = text[start:end]
|
66 |
+
# Create processed result dict
|
67 |
+
processed_result = {
|
68 |
+
'span': span,
|
69 |
+
'start': start,
|
70 |
+
'end': end,
|
71 |
+
'score': result['score'],
|
72 |
+
'entity': label
|
73 |
+
}
|
74 |
+
processed_results.append(processed_result)
|
75 |
+
|
76 |
+
output = {"text": text, "entities": processed_results}
|
77 |
+
summary = " ".join(entity["span"] for entity in output["entities"])
|
78 |
+
|
79 |
+
return summary, output
|
80 |
+
|
81 |
+
with gr.Blocks(title="Open Information Extracting") as summarization_interface:
|
82 |
+
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here")
|
83 |
+
input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
|
84 |
+
threshold = gr.Slider(0, 1, value=0.3, step=0.01, label="Threshold", info="Lower the threshold to increase how many entities get predicted.")
|
85 |
+
output = [gr.Textbox(label="Summary"), gr.HighlightedText(label="Predicted Entities")]
|
86 |
+
submit_btn = gr.Button("Submit")
|
87 |
+
examples = gr.Examples(
|
88 |
+
open_ie_examples,
|
89 |
+
fn=process,
|
90 |
+
inputs=[prompt, input_text, threshold],
|
91 |
+
outputs=output,
|
92 |
+
cache_examples=True
|
93 |
+
)
|
94 |
+
theme=gr.themes.Base()
|
95 |
+
|
96 |
+
input_text.submit(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
97 |
+
prompt.submit(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
98 |
+
threshold.release(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
99 |
+
submit_btn.click(fn=process, inputs=[prompt, input_text, threshold,], outputs=output)
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
|
104 |
+
summarization_interface.launch()
|
interfaces/text_cleaning.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Union, List
|
2 |
+
from .base_pipeline import transformers_pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
|
6 |
+
text1 = """
|
7 |
+
The mechanism of action was characterized using native mass spectrometry, the thermal shift-binding assay, and enzymatic kinetic studies (Figure ). In the native mass spectrometry binding assay, compound 23R showed dose-dependent binding to SARS-CoV-2 Mpro, similar to the positive control GC376, with a binding stoichiometry of one drug per monomer (Figure A).
|
8 |
+
Similarly, compound 23R showed dose-dependent stabilization of the SARS-CoV-2 Mpro in the thermal shift binding assay with an apparent Kd value of 9.43 μM, a 9.3-fold decrease compared to ML188 (1) (Figure B). In the enzymatic kinetic studies, 23R was shown to be a noncovalent inhibitor with a Ki value of 0.07 μM (Figure C, D top and middle panels). In comparison, the Ki for the parent compound ML188 (1) is 2.29 μM.
|
9 |
+
The Lineweaver–Burk or double-reciprocal plot with different compound concentrations yielded an intercept at the Y-axis, suggesting that 23R is a competitive inhibitor similar to ML188 (1) (Figure C, D bottom panel). Buy our T-shirts for the lowerst prices you can find!!! Overall, the enzymatic kinetic studies confirmed that compound 23R is a noncovalent inhibitor of SARS-CoV-2 Mpro."""
|
10 |
+
|
11 |
+
text2 = """
|
12 |
+
Apple Inc. is an American multinational technology company headquartered in Cupertino, California. Apple is the world's largest technology company by revenue, with US$394.3 billion in 2022 revenue. As of March 2023, Apple is the world's biggest company by market capitalization. As of June 2022, Apple is the fourth-largest personal computer vendor by unit sales and the second-largest mobile phone manufacturer in the world. It is considered one of the Big Five American information technology companies, alongside Alphabet (parent company of Google), Amazon, Meta Platforms, and Microsoft.
|
13 |
+
Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975 to develop and sell BASIC interpreters for the Altair 8800. During his career at Microsoft, Gates held the positions of chairman, chief executive officer, president and chief software architect, while also being the largest individual shareholder until May 2014.
|
14 |
+
Apple was founded as Apple Computer Company on April 1, 1976, by Steve Wozniak, Steve Jobs (1955–2011) and Ronald Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and Wozniak as Apple Computer, Inc. in 1977. The company's second computer, the Apple II, became a best seller and one of the first mass-produced microcomputers. Apple went public in 1980 to instant financial success. The company developed computers featuring innovative graphical user interfaces, including the 1984 original Macintosh, announced that year in a critically acclaimed advertisement called "1984". By 1985, the high cost of its products, and power struggles between executives, caused problems. Wozniak stepped back from Apple and pursued other ventures, while Jobs resigned and founded NeXT, taking some Apple employees with him. Order today and get 30% discount!
|
15 |
+
"""
|
16 |
+
|
17 |
+
open_ie_examples = [
|
18 |
+
[
|
19 |
+
f"Clean the following text extracted from the web matching not relevant parts:",
|
20 |
+
text1,
|
21 |
+
0.5,
|
22 |
+
False
|
23 |
+
],
|
24 |
+
[
|
25 |
+
f"Clean the following text extracted from the web matching not relevant parts:",
|
26 |
+
text2,
|
27 |
+
0.5,
|
28 |
+
False
|
29 |
+
]]
|
30 |
+
|
31 |
+
|
32 |
+
def process(prompt, text, treshold=0.5, label = "match"):
|
33 |
+
"""
|
34 |
+
Processes text by preparing prompt and adjusting indices.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
text (str): The text to process
|
38 |
+
prompt (str): The prompt to prepend to the text
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
list: A list of dicts with adjusted spans and scores
|
42 |
+
"""
|
43 |
+
# Concatenate text and prompt for full input
|
44 |
+
input_ = f"{prompt}\n{text}"
|
45 |
+
results = transformers_pipeline(input_) # Run NLP on full input
|
46 |
+
processed_results = []
|
47 |
+
prompt_length = len(prompt) # Get prompt length
|
48 |
+
for result in results:
|
49 |
+
# check whether score is higher than treshold
|
50 |
+
if result['score']<treshold:
|
51 |
+
continue
|
52 |
+
# Adjust indices by subtracting prompt length
|
53 |
+
start = result['start'] - prompt_length
|
54 |
+
# If indexes belongs to the prompt - continue
|
55 |
+
if start<0:
|
56 |
+
continue
|
57 |
+
end = result['end'] - prompt_length
|
58 |
+
# Extract span from original text using adjusted indices
|
59 |
+
span = text[start:end]
|
60 |
+
# Create processed result dict
|
61 |
+
processed_result = {
|
62 |
+
'span': span,
|
63 |
+
'start': start,
|
64 |
+
'end': end,
|
65 |
+
'score': result['score'],
|
66 |
+
'entity': label
|
67 |
+
}
|
68 |
+
processed_results.append(processed_result)
|
69 |
+
|
70 |
+
output = {"text": text, "entities": processed_results}
|
71 |
+
|
72 |
+
return output
|
73 |
+
|
74 |
+
with gr.Blocks(title="Text Cleaning") as text_cleaning_interface:
|
75 |
+
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here")
|
76 |
+
input_text = gr.Textbox(label="Text input", placeholder="Enter your text here")
|
77 |
+
threshold = gr.Slider(0, 1, value=0.3, step=0.01, label="Threshold", info="Lower the threshold to increase how many entities get predicted.")
|
78 |
+
output = [gr.HighlightedText(label="Predicted Entities")]
|
79 |
+
submit_btn = gr.Button("Submit")
|
80 |
+
examples = gr.Examples(
|
81 |
+
open_ie_examples,
|
82 |
+
fn=process,
|
83 |
+
inputs=[prompt, input_text, threshold],
|
84 |
+
outputs=output,
|
85 |
+
cache_examples=True
|
86 |
+
)
|
87 |
+
theme=gr.themes.Base()
|
88 |
+
|
89 |
+
input_text.submit(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
90 |
+
prompt.submit(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
91 |
+
threshold.release(fn=process, inputs=[prompt, input_text, threshold], outputs=output)
|
92 |
+
submit_btn.click(fn=process, inputs=[prompt, input_text, threshold,], outputs=output)
|
93 |
+
|
94 |
+
|
95 |
+
if __name__ == "__main__":
|
96 |
+
|
97 |
+
text_cleaning_interface.launch()
|
materials/introduction.html
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>UTC (Universal Token Classification) HandyLab App Description</title>
|
7 |
+
<style>
|
8 |
+
body {
|
9 |
+
font-family: Arial, sans-serif;
|
10 |
+
margin: 10px;
|
11 |
+
line-height: 1.6;
|
12 |
+
}
|
13 |
+
.link-button {
|
14 |
+
display: inline-flex;
|
15 |
+
align-items: center; /* Center content vertically */
|
16 |
+
justify-content: center; /* Center content horizontally */
|
17 |
+
margin: 5px; /* Consistent margin around buttons */
|
18 |
+
padding: 8px 10px; /* Slightly reduced vertical padding */
|
19 |
+
background-color: white;
|
20 |
+
border: 1px solid grey;
|
21 |
+
color: #007bff;
|
22 |
+
text-decoration: none;
|
23 |
+
border-radius: 10px;
|
24 |
+
box-sizing: border-box;
|
25 |
+
height: 40px; /* Fixed height for uniformity */
|
26 |
+
flex: 0 0 auto; /* No grow, no shrink, automatic basis */
|
27 |
+
}
|
28 |
+
.link-button img {
|
29 |
+
max-height: 30px; /* Maximum height for images */
|
30 |
+
margin: 0; /* Remove any default margin */
|
31 |
+
padding: 0; /* Remove any default padding */
|
32 |
+
}
|
33 |
+
.links-container {
|
34 |
+
display: flex;
|
35 |
+
justify-content: center;
|
36 |
+
flex-wrap: nowrap; /* Prevent wrapping */
|
37 |
+
align-items: center; /* This should ensure vertical alignment */
|
38 |
+
width: 100%; /* Full width to accommodate all items */
|
39 |
+
margin-top: 10px; /* Some space above the container */
|
40 |
+
}
|
41 |
+
.discord-adjust {
|
42 |
+
transform: translateY(-2px); /* Moves the button slightly upwards */
|
43 |
+
}
|
44 |
+
</style>
|
45 |
+
</head>
|
46 |
+
<body>
|
47 |
+
<h2>Welcome to UTC (Universal Token Classification) HandyLab!</h2>
|
48 |
+
<h3>With UTC (Universal Token Classification) HandyLab, you can effortlessly handle following tasks:</h3>
|
49 |
+
<ol>
|
50 |
+
<li><b>Named Entity Recognition (NER):</b> Identifies and categorizes entities such as names, organizations, dates, and other specific items in the text.</li>
|
51 |
+
<li><b>Relation Extraction:</b> Detects and classifies relationships between entities within the text.</li>
|
52 |
+
<li><b>Summarization:</b> Extract the most important sentences that summarize the input text, capturing the essential information.</li>
|
53 |
+
<li><b>Sentiment Extraction:</b> Identify parts of the text that signalize a positive, negative, or neutral sentiment.</li>
|
54 |
+
<li><b>Key-Phrase Extraction:</b> Identifies and extracts important phrases and keywords from the text.</li>
|
55 |
+
<li><b>Question-answering:</b> Finding an answer in the text given a question.</li>
|
56 |
+
<li><b>Open Information Extraction:</b> Extracts pieces of text given an open prompt from a user, for example, product description extraction.</li>
|
57 |
+
<li><b>Text Cleaning:</b> Clear the text from unnecessary parts according to the prompt.</li>
|
58 |
+
</ol>
|
59 |
+
<h3>What is UTC (Universal Token Classification) HandyLab</h3>
|
60 |
+
<p>UTC (Universal Token Classification) HandyLab serves as a foundational showcase of our technological capabilities within the universal information extraction. It enployes the model <a href="https://huggingface.co/knowledgator/UTC-DeBERTa-large-v2">"knowledgator/UTC-DeBERTa-large-v2"</a>. UTC-DeBERTa-large-v2 is a secong version of our UTC model, designed to extract various pieces of information from plain text based on a user-provided custom prompt.<p>
|
61 |
+
<h3>Remember, information extraction is not just about data; it's about insights. Let's uncover those insights together!💫</h3>
|
62 |
+
<!-- Links Section -->
|
63 |
+
<div class="links-container">
|
64 |
+
<a href="https://www.knowledgator.com/" class="link-button" target="_blank"><img src="https://assets-global.website-files.com/65902be8ba48a05dfdb73331/6590476fcc8e8f35b2332781_Group%201000002504%20(1).png" alt="Visit our website"></a>
|
65 |
+
<a href="https://www.linkedin.com/company/knowledgator/" class="link-button" target="_blank"><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/01/LinkedIn_Logo.svg/1280px-LinkedIn_Logo.svg.png" alt="Follow on LinkedIn"></a>
|
66 |
+
<a href="https://huggingface.co/knowledgator/" class="link-button" target="_blank"><img src="https://vectorseek.com/wp-content/uploads/2023/12/Hugging-Face-Logo-Vector.svg-.png" alt="Hugging Face Profile"></a>
|
67 |
+
<a href="https://twitter.com/knowledgator" class="link-button" target="_blank"><img src="https://freepnglogo.com/images/all_img/1691832278twitter-x-logo-png.png" alt="Follow on X"></a>
|
68 |
+
<a href="https://blog.knowledgator.com/" class="link-button" target="_blank"><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Medium_%28website%29_logo.svg/2560px-Medium_%28website%29_logo.svg.png" alt="Follow on Medium"></a>
|
69 |
+
<a href="https://discord.com/invite/dkyeAgs9DG" class="link-button discord-adjust" target="_blank">
|
70 |
+
<img src="https://assets-global.website-files.com/6257adef93867e50d84d30e2/636e0b52aa9e99b832574a53_full_logo_blurple_RGB.png" alt="Join our Discord">
|
71 |
+
</a>
|
72 |
+
</div>
|
73 |
+
</body>
|
74 |
+
</html>
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
utca>=0.1.0
|
2 |
+
gradio==4.31.5
|