nickil commited on
Commit
4d50603
1 Parent(s): 38c2a22

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -55
app.py CHANGED
@@ -1,9 +1,16 @@
 
 
 
 
 
 
 
1
  import gradio
2
  import benepar
3
  import spacy
4
  import nltk
5
  from nltk.tree import Tree
6
- nltk.download('stopwords')
7
 
8
  from huggingface_hub import hf_hub_url, cached_download
9
 
@@ -13,57 +20,67 @@ from weakly_supervised_parser.model.trainer import InsideOutsideStringClassifier
13
 
14
  from weakly_supervised_parser.model.span_classifier import LightningModel
15
 
16
- benepar.download('benepar_en3')
17
-
18
- nlp = spacy.load("en_core_web_md")
19
- nlp.add_pipe("benepar", config={"model": "benepar_en3"})
20
-
21
- inside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=256)
22
- fetch_url_inside_model = hf_hub_url(repo_id="nickil/weakly-supervised-parsing", filename="inside_model.onnx", revision="main")
23
- # inside_model = LightningModel.load_from_checkpoint(checkpoint_path=cached_download(fetch_url_inside_model))
24
- inside_model.load_model(pre_trained_model_path=cached_download(fetch_url_inside_model))
25
-
26
- # outside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=64)
27
- # outside_model.load_model(pre_trained_model_path=TRAINED_MODEL_PATH + "outside_model.onnx")
28
-
29
- # inside_outside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=256)
30
- # inside_outside_model.load_model(pre_trained_model_path=TRAINED_MODEL_PATH + "inside_outside_model.onnx")
31
-
32
-
33
- def predict(sentence, model):
34
- gold_standard = list(nlp(sentence).sents)[0]._.parse_string
35
- if model == "inside":
36
- best_parse = Predictor(sentence=sentence).obtain_best_parse(predict_type="inside", model=inside_model, scale_axis=1, predict_batch_size=128)
37
- elif model == "outside":
38
- best_parse = Predictor(sentence=sentence).obtain_best_parse(predict_type="outside", model=outside_model, scale_axis=1, predict_batch_size=128)
39
- elif model == "inside-outside":
40
- best_parse = Predictor(sentence=sentence).obtain_best_parse(predict_type="inside_outside", model=inside_outside_model, scale_axis=1, predict_batch_size=128)
41
- sentence_f1 = calculate_F1_for_spans(tree_to_spans(gold_standard), tree_to_spans(best_parse))
42
- return gold_standard, best_parse, f"{sentence_f1:.2f}"
43
-
44
-
45
- iface = gradio.Interface(
46
- title="Co-training an Unsupervised Constituency Parser with Weak Supervision",
47
- description="Demo for the repository - [weakly-supervised-parsing](https://github.com/Nickil21/weakly-supervised-parsing) (ACL Findings 2022)",
48
- theme="default",
49
- article="""<h4 class='text-lg font-semibold my-2'>Note</h4>
50
- - We use a strong supervised parsing model `benepar_en3` which is based on T5-small to compute the gold parse.<br>
51
- - Sentence F1 score corresponds to the macro F1 score.
52
- """,
53
- allow_flagging="never",
54
- fn=predict,
55
- inputs=[
56
- gradio.inputs.Textbox(label="Sentence", placeholder="Enter a sentence in English"),
57
- gradio.inputs.Radio(["inside", "outside", "inside-outside"], default="inside", label="Choose Model"),
58
- ],
59
- outputs=[
60
- gradio.outputs.Textbox(label="Gold Parse Tree"),
61
- gradio.outputs.Textbox(label="Predicted Parse Tree"),
62
- gradio.outputs.Textbox(label="F1 score"),
63
- ],
64
- examples=[
65
- ["Russia 's war on Ukraine unsettles investors expecting carve-out deal uptick for 2022 .", "inside-outside"],
66
- ["Bitcoin community under pressure to cut energy use .", "inside"],
67
- ],
68
- )
69
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import matplotlib
3
+ matplotlib.use('agg')
4
+
5
+ from PIL import Image
6
+
7
+
8
  import gradio
9
  import benepar
10
  import spacy
11
  import nltk
12
  from nltk.tree import Tree
13
+ from nltk.draw.tree import TreeView
14
 
15
  from huggingface_hub import hf_hub_url, cached_download
16
 
 
20
 
21
  from weakly_supervised_parser.model.span_classifier import LightningModel
22
 
23
+
24
+ if __name__ == "__main__":
25
+ nltk.download('stopwords')
26
+ benepar.download('benepar_en3')
27
+
28
+ nlp = spacy.load("en_core_web_md")
29
+ nlp.add_pipe("benepar", config={"model": "benepar_en3"})
30
+
31
+ # inside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=256)
32
+ fetch_url_inside_model = hf_hub_url(repo_id="nickil/weakly-supervised-parsing", filename="inside_model.ckpt", revision="main")
33
+ inside_model = LightningModel.load_from_checkpoint(checkpoint_path=cached_download(fetch_url_inside_model))
34
+ # inside_model.load_model(pre_trained_model_path=cached_download(fetch_url_inside_model))
35
+
36
+ # outside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=64)
37
+ # outside_model.load_model(pre_trained_model_path=TRAINED_MODEL_PATH + "outside_model.onnx")
38
+
39
+ # inside_outside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=256)
40
+ # inside_outside_model.load_model(pre_trained_model_path=TRAINED_MODEL_PATH + "inside_outside_model.onnx")
41
+
42
+
43
+ def predict(sentence, model):
44
+ gold_standard = list(nlp(sentence).sents)[0]._.parse_string
45
+ if model == "inside":
46
+ best_parse = Predictor(sentence=sentence).obtain_best_parse(predict_type="inside", model=inside_model, scale_axis=1, predict_batch_size=128)
47
+ elif model == "outside":
48
+ best_parse = Predictor(sentence=sentence).obtain_best_parse(predict_type="outside", model=outside_model, scale_axis=1, predict_batch_size=128)
49
+ elif model == "inside-outside":
50
+ best_parse = Predictor(sentence=sentence).obtain_best_parse(predict_type="inside_outside", model=inside_outside_model, scale_axis=1, predict_batch_size=128)
51
+ sentence_f1 = calculate_F1_for_spans(tree_to_spans(gold_standard), tree_to_spans(best_parse))
52
+ TreeView(Tree.fromstring(gold_standard))._cframe.print_to_file('gold_standard.ps')
53
+ TreeView(Tree.fromstring(best_parse))._cframe.print_to_file('best_parse.ps')
54
+ os.system('convert gold_standard.ps gold_standard.png')
55
+ os.system('convert best_parse.ps best_parse.png')
56
+ gold_standard_img = Image.open("gold_standard.png")
57
+ best_parse_img = Image.open("best_parse.png")
58
+ return gold_standard_img, best_parse_img, f"{sentence_f1:.2f}"
59
+
60
+
61
+ iface = gradio.Interface(
62
+ title="Co-training an Unsupervised Constituency Parser with Weak Supervision",
63
+ description="Demo for the repository - [weakly-supervised-parsing](https://github.com/Nickil21/weakly-supervised-parsing) (ACL Findings 2022)",
64
+ theme="default",
65
+ article="""<h4 class='text-lg font-semibold my-2'>Note</h4>
66
+ - We use a strong supervised parsing model `benepar_en3` which is based on T5-small to compute the gold parse.<br>
67
+ - Sentence F1 score corresponds to the macro F1 score.
68
+ """,
69
+ allow_flagging="never",
70
+ fn=predict,
71
+ inputs=[
72
+ gradio.inputs.Textbox(label="Sentence", placeholder="Enter a sentence in English", lines=2),
73
+ gradio.inputs.Radio(["inside", "outside", "inside-outside"], default="inside", label="Choose Model"),
74
+ ],
75
+ outputs=[
76
+ gradio.outputs.Image(label="Gold Parse Tree"),
77
+ gradio.outputs.Image(label="Predicted Parse Tree"),
78
+ gradio.outputs.Textbox(label="F1 score"),
79
+ ],
80
+ examples=[
81
+ ["Russia 's war on Ukraine unsettles investors expecting carve-out deal uptick for 2022 .", "inside-outside"],
82
+ ["Bitcoin community under pressure to cut energy use .", "inside"],
83
+ ],
84
+ )
85
+
86
+ iface.launch(share=True)