Commit
·
ff652d8
0
Parent(s):
Duplicate from Michau/t5-base-en-generate-headline
Browse filesCo-authored-by: Michal Pleban <[email protected]>
- .gitattributes +9 -0
- README.md +50 -0
- config.json +52 -0
- flax_model.msgpack +3 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- spiece.model +0 -0
- tf_model.h5 +3 -0
- tokenizer_config.json +1 -0
.gitattributes
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
duplicated_from: Michau/t5-base-en-generate-headline
|
3 |
+
---
|
4 |
+
## About the model
|
5 |
+
|
6 |
+
The model has been trained on a collection of 500k articles with headings. Its purpose is to create a one-line heading suitable for the given article.
|
7 |
+
|
8 |
+
Sample code with a WikiNews article:
|
9 |
+
|
10 |
+
```python
|
11 |
+
import torch
|
12 |
+
from transformers import T5ForConditionalGeneration,T5Tokenizer
|
13 |
+
|
14 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
15 |
+
|
16 |
+
model = T5ForConditionalGeneration.from_pretrained("Michau/t5-base-en-generate-headline")
|
17 |
+
tokenizer = T5Tokenizer.from_pretrained("Michau/t5-base-en-generate-headline")
|
18 |
+
model = model.to(device)
|
19 |
+
|
20 |
+
article = '''
|
21 |
+
Very early yesterday morning, the United States President Donald Trump reported he and his wife First Lady Melania Trump tested positive for COVID-19. Officials said the Trumps' 14-year-old son Barron tested negative as did First Family and Senior Advisors Jared Kushner and Ivanka Trump.
|
22 |
+
Trump took to social media, posting at 12:54 am local time (0454 UTC) on Twitter, "Tonight, [Melania] and I tested positive for COVID-19. We will begin our quarantine and recovery process immediately. We will get through this TOGETHER!" Yesterday afternoon Marine One landed on the White House's South Lawn flying Trump to Walter Reed National Military Medical Center (WRNMMC) in Bethesda, Maryland.
|
23 |
+
Reports said both were showing "mild symptoms". Senior administration officials were tested as people were informed of the positive test. Senior advisor Hope Hicks had tested positive on Thursday.
|
24 |
+
Presidential physician Sean Conley issued a statement saying Trump has been given zinc, vitamin D, Pepcid and a daily Aspirin. Conley also gave a single dose of the experimental polyclonal antibodies drug from Regeneron Pharmaceuticals.
|
25 |
+
According to official statements, Trump, now operating from the WRNMMC, is to continue performing his duties as president during a 14-day quarantine. In the event of Trump becoming incapacitated, Vice President Mike Pence could take over the duties of president via the 25th Amendment of the US Constitution. The Pence family all tested negative as of yesterday and there were no changes regarding Pence's campaign events.
|
26 |
+
'''
|
27 |
+
|
28 |
+
text = "headline: " + article
|
29 |
+
|
30 |
+
max_len = 256
|
31 |
+
|
32 |
+
encoding = tokenizer.encode_plus(text, return_tensors = "pt")
|
33 |
+
input_ids = encoding["input_ids"].to(device)
|
34 |
+
attention_masks = encoding["attention_mask"].to(device)
|
35 |
+
|
36 |
+
beam_outputs = model.generate(
|
37 |
+
input_ids = input_ids,
|
38 |
+
attention_mask = attention_masks,
|
39 |
+
max_length = 64,
|
40 |
+
num_beams = 3,
|
41 |
+
early_stopping = True,
|
42 |
+
)
|
43 |
+
|
44 |
+
result = tokenizer.decode(beam_outputs[0])
|
45 |
+
print(result)
|
46 |
+
```
|
47 |
+
|
48 |
+
Result:
|
49 |
+
|
50 |
+
```Trump and First Lady Melania Test Positive for COVID-19```
|
config.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"T5ForConditionalGeneration"
|
4 |
+
],
|
5 |
+
"d_ff": 3072,
|
6 |
+
"d_kv": 64,
|
7 |
+
"d_model": 768,
|
8 |
+
"decoder_start_token_id": 0,
|
9 |
+
"dropout_rate": 0.1,
|
10 |
+
"eos_token_id": 1,
|
11 |
+
"initializer_factor": 1.0,
|
12 |
+
"is_encoder_decoder": true,
|
13 |
+
"layer_norm_epsilon": 1e-06,
|
14 |
+
"model_type": "t5",
|
15 |
+
"n_positions": 512,
|
16 |
+
"num_decoder_layers": 12,
|
17 |
+
"num_heads": 12,
|
18 |
+
"num_layers": 12,
|
19 |
+
"output_past": true,
|
20 |
+
"pad_token_id": 0,
|
21 |
+
"relative_attention_num_buckets": 32,
|
22 |
+
"task_specific_params": {
|
23 |
+
"summarization": {
|
24 |
+
"early_stopping": true,
|
25 |
+
"length_penalty": 2.0,
|
26 |
+
"max_length": 200,
|
27 |
+
"min_length": 30,
|
28 |
+
"no_repeat_ngram_size": 3,
|
29 |
+
"num_beams": 4,
|
30 |
+
"prefix": "summarize: "
|
31 |
+
},
|
32 |
+
"translation_en_to_de": {
|
33 |
+
"early_stopping": true,
|
34 |
+
"max_length": 300,
|
35 |
+
"num_beams": 4,
|
36 |
+
"prefix": "translate English to German: "
|
37 |
+
},
|
38 |
+
"translation_en_to_fr": {
|
39 |
+
"early_stopping": true,
|
40 |
+
"max_length": 300,
|
41 |
+
"num_beams": 4,
|
42 |
+
"prefix": "translate English to French: "
|
43 |
+
},
|
44 |
+
"translation_en_to_ro": {
|
45 |
+
"early_stopping": true,
|
46 |
+
"max_length": 300,
|
47 |
+
"num_beams": 4,
|
48 |
+
"prefix": "translate English to Romanian: "
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"vocab_size": 32128
|
52 |
+
}
|
flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a5333753924011bd712660e5f61f21a039232d8c2c9d9653117387f7d8dc7ed
|
3 |
+
size 891625348
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b6757098828488caa2a6a5dda131be8d4f2b1c3c2cd4c25429c31ffc5d4a098
|
3 |
+
size 891695056
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
|
spiece.model
ADDED
Binary file (792 kB). View file
|
|
tf_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3ffd17d1142ab8136cb657deacefa5c44774fe90c851beec1e06a0e65fc4e0f
|
3 |
+
size 892147952
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"truncate": true, "model_max_length": 512}
|