yangheng commited on
Commit
28c5195
1 Parent(s): 310e8a7
app.py CHANGED
@@ -8,23 +8,20 @@
8
  # Copyright (C) 2023. All Rights Reserved.
9
 
10
  import random
 
 
11
  import gradio as gr
12
  import pandas as pd
13
  from pyabsa import (
14
  download_all_available_datasets,
15
- AspectTermExtraction as ATEPC,
16
  TaskCodeOption,
17
  available_checkpoints,
18
  )
19
- from pyabsa import AspectSentimentTripletExtraction as ASTE
20
  from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset
21
 
22
  download_all_available_datasets()
23
 
24
- atepc_dataset_items = {dataset.name: dataset for dataset in ATEPC.ATEPCDatasetList()}
25
- aste_dataset_items = {dataset.name: dataset for dataset in ASTE.ASTEDatasetList()}
26
-
27
-
28
  def get_atepc_example(dataset):
29
  task = TaskCodeOption.Aspect_Polarity_Classification
30
  dataset_file = detect_infer_dataset(atepc_dataset_items[dataset], task)
@@ -66,18 +63,65 @@ def get_aste_example(dataset):
66
  return sorted(set(lines), key=lines.index)
67
 
68
 
69
- available_checkpoints("ASTE", True)
 
 
70
 
71
- atepc_dataset_dict = {
72
- dataset.name: get_atepc_example(dataset.name)
73
- for dataset in ATEPC.ATEPCDatasetList()
74
- }
75
- aspect_extractor = ATEPC.AspectExtractor(checkpoint="multilingual")
76
 
77
- aste_dataset_dict = {
78
- dataset.name: get_aste_example(dataset.name) for dataset in ASTE.ASTEDatasetList()
79
- }
80
- triplet_extractor = ASTE.AspectSentimentTripletExtractor(checkpoint="multilingual")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  def perform_atepc_inference(text, dataset):
@@ -113,67 +157,121 @@ def perform_aste_inference(text, dataset):
113
  return pred_triplets, true_triplets, "{}".format(text)
114
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  demo = gr.Blocks()
117
 
118
  with demo:
119
- with gr.Row():
120
 
121
- with gr.Column():
122
- gr.Markdown("# <p align='center'>Aspect Sentiment Triplet Extraction !</p>")
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  with gr.Row():
125
  with gr.Column():
126
- aste_input_sentence = gr.Textbox(
127
- placeholder="Leave this box blank and choose a dataset will give you a random example...",
128
- label="Example:",
129
- )
130
- gr.Markdown(
131
- "You can find code and dataset at [ASTE examples](https://github.com/yangheng95/PyABSA/tree/v2/examples-v2/aspect_sentiment_triplet_extration)"
132
- )
133
- aste_dataset_ids = gr.Radio(
134
- choices=[dataset.name for dataset in ASTE.ASTEDatasetList()[:-1]],
135
- value="Restaurant14",
136
- label="Datasets",
137
- )
138
- aste_inference_button = gr.Button("Let's go!")
139
-
140
- aste_output_text = gr.TextArea(label="Example:")
141
- aste_output_pred_df = gr.DataFrame(label="Predicted Triplets:")
142
- aste_output_true_df = gr.DataFrame(label="Original Triplets:")
143
-
144
- aste_inference_button.click(
145
- fn=perform_aste_inference,
146
- inputs=[aste_input_sentence, aste_dataset_ids],
147
- outputs=[aste_output_pred_df, aste_output_true_df, aste_output_text],
148
- )
149
 
150
- with gr.Column():
151
- gr.Markdown(
152
- "# <p align='center'>Multilingual Aspect-based Sentiment Analysis !</p>"
153
- )
154
- with gr.Row():
155
- with gr.Column():
156
- atepc_input_sentence = gr.Textbox(
157
  placeholder="Leave this box blank and choose a dataset will give you a random example...",
158
  label="Example:",
159
  )
160
- gr.Markdown(
161
- "You can find the datasets at [github.com/yangheng95/ABSADatasets](https://github.com/yangheng95/ABSADatasets/tree/v1.2/datasets/text_classification)"
162
- )
163
- atepc_dataset_ids = gr.Radio(
164
- choices=[dataset.name for dataset in ATEPC.ATEPCDatasetList()[:-1]],
165
- value="Laptop14",
166
  label="Datasets",
167
  )
168
- atepc_inference_button = gr.Button("Let's go!")
169
 
170
- atepc_output_text = gr.TextArea(label="Example:")
171
- atepc_output_df = gr.DataFrame(label="Prediction Results:")
172
 
173
- atepc_inference_button.click(
174
- fn=perform_atepc_inference,
175
- inputs=[atepc_input_sentence, atepc_dataset_ids],
176
- outputs=[atepc_output_df, atepc_output_text],
177
  )
178
  gr.Markdown(
179
  """### GitHub Repo: [PyABSA V2](https://github.com/yangheng95/PyABSA)
 
8
  # Copyright (C) 2023. All Rights Reserved.
9
 
10
  import random
11
+
12
+ import autocuda
13
  import gradio as gr
14
  import pandas as pd
15
  from pyabsa import (
16
  download_all_available_datasets,
 
17
  TaskCodeOption,
18
  available_checkpoints,
19
  )
20
+ from pyabsa import ABSAInstruction
21
  from pyabsa.utils.data_utils.dataset_manager import detect_infer_dataset
22
 
23
  download_all_available_datasets()
24
 
 
 
 
 
25
  def get_atepc_example(dataset):
26
  task = TaskCodeOption.Aspect_Polarity_Classification
27
  dataset_file = detect_infer_dataset(atepc_dataset_items[dataset], task)
 
63
  return sorted(set(lines), key=lines.index)
64
 
65
 
66
+ def get_acos_example(dataset):
67
+ task = 'ACOS'
68
+ dataset_file = detect_infer_dataset(acos_dataset_items[dataset], task)
69
 
70
+ for fname in dataset_file:
71
+ lines = []
72
+ if isinstance(fname, str):
73
+ fname = [fname]
 
74
 
75
+ for f in fname:
76
+ print("loading: {}".format(f))
77
+ fin = open(f, "r", encoding="utf-8")
78
+ lines.extend(fin.readlines())
79
+ fin.close()
80
+ lines = [line.split('####')[0] for line in lines]
81
+ return sorted(set(lines), key=lines.index)
82
+
83
+
84
+ try:
85
+ from pyabsa import AspectTermExtraction as ATEPC
86
+ atepc_dataset_items = {dataset.name: dataset for dataset in ATEPC.ATEPCDatasetList()}
87
+ atepc_dataset_dict = {
88
+ dataset.name: get_atepc_example(dataset.name)
89
+ for dataset in ATEPC.ATEPCDatasetList()
90
+ }
91
+ aspect_extractor = ATEPC.AspectExtractor(checkpoint="multilingual")
92
+ except Exception as e:
93
+ print(e)
94
+ atepc_dataset_items = {}
95
+ atepc_dataset_dict = {}
96
+ aspect_extractor = None
97
+
98
+ try:
99
+ from pyabsa import AspectSentimentTripletExtraction as ASTE
100
+
101
+ aste_dataset_items = {dataset.name: dataset for dataset in ASTE.ASTEDatasetList()}
102
+ aste_dataset_dict = {
103
+ dataset.name: get_aste_example(dataset.name) for dataset in ASTE.ASTEDatasetList()
104
+ }
105
+ triplet_extractor = ASTE.AspectSentimentTripletExtractor(checkpoint="multilingual")
106
+ except Exception as e:
107
+ print(e)
108
+ aste_dataset_items = {}
109
+ aste_dataset_dict = {}
110
+ triplet_extractor = None
111
+
112
+ try:
113
+ from pyabsa import ABSAInstruction
114
+
115
+ acos_dataset_items = {dataset.name: dataset for dataset in ABSAInstruction.ACOSDatasetList()[:-1]}
116
+ acos_dataset_dict = {
117
+ dataset.name: get_acos_example(dataset.name) for dataset in ABSAInstruction.ACOSDatasetList()[:-1]
118
+ }
119
+ quadruple_extractor = ABSAInstruction.ABSAGenerator(checkpoint="multilingual", device=autocuda.auto_cuda())
120
+ except Exception as e:
121
+ print(e)
122
+ acos_dataset_items = {}
123
+ acos_dataset_dict = {}
124
+ quadruple_extractor = None
125
 
126
 
127
  def perform_atepc_inference(text, dataset):
 
157
  return pred_triplets, true_triplets, "{}".format(text)
158
 
159
 
160
+ def perform_acos_inference(text, dataset):
161
+ if not text:
162
+ text = acos_dataset_dict[dataset][
163
+ random.randint(0, len(acos_dataset_dict[dataset]) - 1)
164
+ ]
165
+
166
+ raw_output = quadruple_extractor.predict(text)
167
+ outputs = raw_output[0].strip().split(', ')
168
+ data = {}
169
+ for output in outputs:
170
+ for sub_output in output.split('|'):
171
+ if 'aspect' in sub_output:
172
+ data['aspect'] = sub_output.split(':')[1]
173
+ elif 'opinion' in sub_output:
174
+ data['opinion'] = sub_output.split(':')[1]
175
+ elif 'sentiment' in sub_output:
176
+ data['sentiment'] = sub_output.split(':')[1]
177
+ elif 'polarity' in sub_output:
178
+ data['polarity'] = sub_output.split(':')[1]
179
+ elif 'category' in sub_output:
180
+ try:
181
+ data['category'] = sub_output.split(':')[1]
182
+ except:
183
+ data['category'] = ''
184
+
185
+ result = pd.DataFrame.from_dict(data, orient='index').T
186
+ return result, text
187
+
188
  demo = gr.Blocks()
189
 
190
  with demo:
 
191
 
 
 
192
 
193
+ with gr.Row():
194
+ if triplet_extractor:
195
+ with gr.Column():
196
+ gr.Markdown("# <p align='center'>Aspect Sentiment Triplet Extraction !</p>")
197
+
198
+ with gr.Row():
199
+ with gr.Column():
200
+ aste_input_sentence = gr.Textbox(
201
+ placeholder="Leave this box blank and choose a dataset will give you a random example...",
202
+ label="Example:",
203
+ )
204
+ gr.Markdown(
205
+ "You can find code and dataset at [ASTE examples](https://github.com/yangheng95/PyABSA/tree/v2/examples-v2/aspect_sentiment_triplet_extration)"
206
+ )
207
+ aste_dataset_ids = gr.Radio(
208
+ choices=[dataset.name for dataset in ASTE.ASTEDatasetList()[:-1]],
209
+ value="Restaurant14",
210
+ label="Datasets",
211
+ )
212
+ aste_inference_button = gr.Button("Let's go!")
213
+
214
+ aste_output_text = gr.TextArea(label="Example:")
215
+ aste_output_pred_df = gr.DataFrame(label="Predicted Triplets:")
216
+ aste_output_true_df = gr.DataFrame(label="Original Triplets:")
217
+
218
+ aste_inference_button.click(
219
+ fn=perform_aste_inference,
220
+ inputs=[aste_input_sentence, aste_dataset_ids],
221
+ outputs=[aste_output_pred_df, aste_output_true_df, aste_output_text],
222
+ )
223
+ if aspect_extractor:
224
+ with gr.Column():
225
+ gr.Markdown(
226
+ "# <p align='center'>Multilingual Aspect-based Sentiment Analysis !</p>"
227
+ )
228
+ with gr.Row():
229
+ with gr.Column():
230
+ atepc_input_sentence = gr.Textbox(
231
+ placeholder="Leave this box blank and choose a dataset will give you a random example...",
232
+ label="Example:",
233
+ )
234
+ gr.Markdown(
235
+ "You can find the datasets at [github.com/yangheng95/ABSADatasets](https://github.com/yangheng95/ABSADatasets/tree/v1.2/datasets/text_classification)"
236
+ )
237
+ atepc_dataset_ids = gr.Radio(
238
+ choices=[dataset.name for dataset in ATEPC.ATEPCDatasetList()[:-1]],
239
+ value="Laptop14",
240
+ label="Datasets",
241
+ )
242
+ atepc_inference_button = gr.Button("Let's go!")
243
+
244
+ atepc_output_text = gr.TextArea(label="Example:")
245
+ atepc_output_df = gr.DataFrame(label="Prediction Results:")
246
+
247
+ atepc_inference_button.click(
248
+ fn=perform_atepc_inference,
249
+ inputs=[atepc_input_sentence, atepc_dataset_ids],
250
+ outputs=[atepc_output_df, atepc_output_text],
251
+ )
252
+ if quadruple_extractor:
253
  with gr.Row():
254
  with gr.Column():
255
+ gr.Markdown("# <p align='center'>Aspect Category Opinion Sentiment Extraction !</p>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
+ acos_input_sentence = gr.Textbox(
 
 
 
 
 
 
258
  placeholder="Leave this box blank and choose a dataset will give you a random example...",
259
  label="Example:",
260
  )
261
+ acos_dataset_ids = gr.Radio(
262
+ choices=[dataset.name for dataset in ABSAInstruction.ACOSDatasetList()],
263
+ value="Restaurant16",
 
 
 
264
  label="Datasets",
265
  )
266
+ acos_inference_button = gr.Button("Let's go!")
267
 
268
+ acos_output_text = gr.TextArea(label="Example:")
269
+ acos_output_pred_df = gr.DataFrame(label="Predicted Triplets:")
270
 
271
+ acos_inference_button.click(
272
+ fn=perform_acos_inference,
273
+ inputs=[acos_input_sentence, acos_dataset_ids],
274
+ outputs=[acos_output_pred_df, acos_output_text],
275
  )
276
  gr.Markdown(
277
  """### GitHub Repo: [PyABSA V2](https://github.com/yangheng95/PyABSA)
checkpoints-v2.0.json CHANGED
@@ -192,6 +192,18 @@
192
  "Author": "H, Yang ([email protected])"
193
  }
194
  },
 
 
 
 
 
 
 
 
 
 
 
 
195
  "UPPERTASKCODE": {
196
  "promise": {
197
  "id": "",
 
192
  "Author": "H, Yang ([email protected])"
193
  }
194
  },
195
+ "ACOS": {
196
+ "multilingual": {
197
+ "id": "",
198
+ "Training Model": "DeBERTa-v3-Base",
199
+ "Training Dataset": "SemEval + Synthetic + Chinese_Zhang datasets",
200
+ "Language": "Multilingual",
201
+ "Description": "Trained on RTX3090",
202
+ "Available Version": "2.1.8+",
203
+ "Checkpoint File": "multilingual-acos.zip",
204
+ "Author": "H, Yang ([email protected])"
205
+ }
206
+ },
207
  "UPPERTASKCODE": {
208
  "promise": {
209
  "id": "",
checkpoints.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"2.0.0": {"APC": {"multilingual": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_bert_Multilingual_acc_87.18_f1_83.11.zip", "Author": "H, Yang ([email protected])"}, "multilingual2": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_bert_Multilingual_acc_82.66_f1_82.06.zip", "Author": "H, Yang ([email protected])"}, "english": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.English", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lsa_t_v2_English_acc_82.21_f1_81.81.zip", "Author": "H, Yang ([email protected])"}, "chinese": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.Chinese", "Language": "Chinese", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lsa_t_v2_Chinese_acc_96.0_f1_95.1.zip", "Author": "H, Yang ([email protected])"}}, "ATEPC": {"multilingual": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_85.1_apcf1_80.2_atef1_76.45.zip", "Author": "H, Yang ([email protected])"}, "multilingual-original": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_80.81_apcf1_73.75_atef1_76.01.zip", "Author": "H, Yang ([email protected])"}, "multilingual2": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_78.08_apcf1_77.81_atef1_75.41.zip", "Author": "H, Yang ([email protected])"}, "english": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ATEPCDatasetList.English", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_atepc_English_cdw_apcacc_82.36_apcf1_81.89_atef1_75.43.zip", "Author": "H, Yang ([email protected])"}, "chinese": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ATEPCDatasetList.Chinese", "Language": "Chinese", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_atepc_Chinese_cdw_apcacc_96.22_apcf1_95.32_atef1_78.73.zip", "Author": "H, Yang ([email protected])"}}, "RNAC": {"degrad_lstm": {"id": "", "Training Model": "LSTM", "Training Dataset": "ABSADatasets.Multilingual", "Language": "RNA", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "lstm_degrad_acc_85.26_f1_84.62.zip", "Author": "H, Yang ([email protected])"}, "degrad_bert": {"id": "", "Training Model": "MLP", "Training Dataset": "Degrad", "Language": "RNA", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "bert_mlp_degrad_acc_87.44_f1_86.99.zip", "Author": "H, Yang ([email protected])"}}, "TAD": {"tad-sst2": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2.zip", "Author": "H, Yang ([email protected])"}, "tad-agnews10k": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10K.zip", "Author": "H, Yang ([email protected])"}, "tad-amazon": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-Amazon.zip", "Author": "H, Yang ([email protected])"}}, "CDD": {"promise": {"id": "", "Training Model": "CodeT5-small", "Training Dataset": "Promise", "Language": "Code", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "bert_mlp_all_cpdp_acc_75.33_f1_73.52.zip", "Author": "H, Yang ([email protected])"}}, "ASTE": {"english1": {"id": "", "Training Model": "DeBERTa-v3-Base", "Training Dataset": "SemEval", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "2.1.1+", "Checkpoint File": "EMCGCN_SemEval_f1_74.01.zip", "Author": "H, Yang ([email protected])"}, "english": {"id": "", "Training Model": "DeBERTa-v3-Base", "Training Dataset": "SemEval", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "2.1.1+", "Checkpoint File": "ASTE-EMCGCN_SemEval_f1_74.71.zip", "Author": "H, Yang ([email protected])"}, "multilingual": {"id": "", "Training Model": "DeBERTa-v3-Base", "Training Dataset": "SemEval + Synthetic + Chinese_Zhang datasets", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "2.1.1+", "Checkpoint File": "EMCGCN-Multilingual-f1_51.95.zip", "Author": "H, Yang ([email protected])"}}, "ACOS": {"multilingual": {"id": "", "Training Model": "DeBERTa-v3-Base", "Training Dataset": "SemEval + Synthetic + Chinese_Zhang datasets", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "2.1.8+", "Checkpoint File": "ACOS.zip", "Author": "H, Yang ([email protected])"}}, "UPPERTASKCODE": {"promise": {"id": "", "Training Model": "CodeT5-small", "Training Dataset": "DatasetName", "Language": "", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "lstm_degrad_acc_85.26_f1_84.62.zip", "Author": "H, Yang ([email protected])"}}}}
checkpoints/Multilingual/ACOS/multilingual-acos.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7e6f53b721579e10fab9d82ff085caf051a6917dcd7d2ec9a4d00a8c44c8d0
3
+ size 882150443