Add multilingual to the language tag
#6
by
lbourdois
- opened
README.md
CHANGED
@@ -4,22 +4,24 @@ language:
|
|
4 |
- de
|
5 |
- fr
|
6 |
- it
|
|
|
|
|
7 |
tags:
|
8 |
- punctuation prediction
|
9 |
- punctuation
|
10 |
datasets: wmt/europarl
|
11 |
-
license: mit
|
12 |
-
widget:
|
13 |
-
- text: "Ho sentito che ti sei laureata il che mi fa molto piacere"
|
14 |
-
example_title: "Italian"
|
15 |
-
- text: "Tous les matins vers quatre heures mon père ouvrait la porte de ma chambre"
|
16 |
-
example_title: "French"
|
17 |
-
- text: "Ist das eine Frage Frau Müller"
|
18 |
-
example_title: "German"
|
19 |
-
- text: "Yet she blushed as if with guilt when Cynthia reading her thoughts said to her one day Molly you're very glad to get rid of us are not you"
|
20 |
-
example_title: "English"
|
21 |
metrics:
|
22 |
- f1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
---
|
24 |
|
25 |
This model predicts the punctuation of English, Italian, French and German texts. We developed it to restore the punctuation of transcribed spoken language.
|
@@ -42,13 +44,13 @@ pip install deepmultilingualpunctuation
|
|
42 |
from deepmultilingualpunctuation import PunctuationModel
|
43 |
|
44 |
model = PunctuationModel()
|
45 |
-
text = "My name is Clara and I live in Berkeley California Ist das eine Frage Frau
|
46 |
result = model.restore_punctuation(text)
|
47 |
print(result)
|
48 |
```
|
49 |
|
50 |
**output**
|
51 |
-
> My name is Clara and I live in Berkeley, California. Ist das eine Frage, Frau
|
52 |
|
53 |
|
54 |
### Predict Labels
|
@@ -56,7 +58,7 @@ print(result)
|
|
56 |
from deepmultilingualpunctuation import PunctuationModel
|
57 |
|
58 |
model = PunctuationModel()
|
59 |
-
text = "My name is Clara and I live in Berkeley California Ist das eine Frage Frau
|
60 |
clean_text = model.preprocess(text)
|
61 |
labled_words = model.predict(clean_text)
|
62 |
print(labled_words)
|
@@ -64,7 +66,7 @@ print(labled_words)
|
|
64 |
|
65 |
**output**
|
66 |
|
67 |
-
> [['My', '0', 0.9999887], ['name', '0', 0.99998665], ['is', '0', 0.9998579], ['Clara', '0', 0.6752215], ['and', '0', 0.99990904], ['I', '0', 0.9999877], ['live', '0', 0.9999839], ['in', '0', 0.9999515], ['Berkeley', ',', 0.99800044], ['California', '.', 0.99534047], ['Ist', '0', 0.99998784], ['das', '0', 0.99999154], ['eine', '0', 0.9999918], ['Frage', ',', 0.99622655], ['Frau', '0', 0.9999889], ['
|
68 |
|
69 |
|
70 |
|
@@ -112,7 +114,7 @@ model = PunctuationModel(model = "oliverguhr/fullstop-dutch-punctuation-predicti
|
|
112 |
```
|
113 |
@article{guhr-EtAl:2021:fullstop,
|
114 |
title={FullStop: Multilingual Deep Models for Punctuation Prediction},
|
115 |
-
author = {Guhr, Oliver and Schumann, Anne-Kathrin and Bahrmann, Frank and
|
116 |
booktitle = {Proceedings of the Swiss Text Analytics Conference 2021},
|
117 |
month = {June},
|
118 |
year = {2021},
|
|
|
4 |
- de
|
5 |
- fr
|
6 |
- it
|
7 |
+
- multilingual
|
8 |
+
license: mit
|
9 |
tags:
|
10 |
- punctuation prediction
|
11 |
- punctuation
|
12 |
datasets: wmt/europarl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
metrics:
|
14 |
- f1
|
15 |
+
widget:
|
16 |
+
- text: Ho sentito che ti sei laureata il che mi fa molto piacere
|
17 |
+
example_title: Italian
|
18 |
+
- text: Tous les matins vers quatre heures mon p�re ouvrait la porte de ma chambre
|
19 |
+
example_title: French
|
20 |
+
- text: Ist das eine Frage Frau M�ller
|
21 |
+
example_title: German
|
22 |
+
- text: Yet she blushed as if with guilt when Cynthia reading her thoughts said to
|
23 |
+
her one day Molly you're very glad to get rid of us are not you
|
24 |
+
example_title: English
|
25 |
---
|
26 |
|
27 |
This model predicts the punctuation of English, Italian, French and German texts. We developed it to restore the punctuation of transcribed spoken language.
|
|
|
44 |
from deepmultilingualpunctuation import PunctuationModel
|
45 |
|
46 |
model = PunctuationModel()
|
47 |
+
text = "My name is Clara and I live in Berkeley California Ist das eine Frage Frau M�ller"
|
48 |
result = model.restore_punctuation(text)
|
49 |
print(result)
|
50 |
```
|
51 |
|
52 |
**output**
|
53 |
+
> My name is Clara and I live in Berkeley, California. Ist das eine Frage, Frau M�ller?
|
54 |
|
55 |
|
56 |
### Predict Labels
|
|
|
58 |
from deepmultilingualpunctuation import PunctuationModel
|
59 |
|
60 |
model = PunctuationModel()
|
61 |
+
text = "My name is Clara and I live in Berkeley California Ist das eine Frage Frau M�ller"
|
62 |
clean_text = model.preprocess(text)
|
63 |
labled_words = model.predict(clean_text)
|
64 |
print(labled_words)
|
|
|
66 |
|
67 |
**output**
|
68 |
|
69 |
+
> [['My', '0', 0.9999887], ['name', '0', 0.99998665], ['is', '0', 0.9998579], ['Clara', '0', 0.6752215], ['and', '0', 0.99990904], ['I', '0', 0.9999877], ['live', '0', 0.9999839], ['in', '0', 0.9999515], ['Berkeley', ',', 0.99800044], ['California', '.', 0.99534047], ['Ist', '0', 0.99998784], ['das', '0', 0.99999154], ['eine', '0', 0.9999918], ['Frage', ',', 0.99622655], ['Frau', '0', 0.9999889], ['M�ller', '?', 0.99863917]]
|
70 |
|
71 |
|
72 |
|
|
|
114 |
```
|
115 |
@article{guhr-EtAl:2021:fullstop,
|
116 |
title={FullStop: Multilingual Deep Models for Punctuation Prediction},
|
117 |
+
author = {Guhr, Oliver and Schumann, Anne-Kathrin and Bahrmann, Frank and B�hme, Hans Joachim},
|
118 |
booktitle = {Proceedings of the Swiss Text Analytics Conference 2021},
|
119 |
month = {June},
|
120 |
year = {2021},
|