centaur31 commited on
Commit
68937fa
1 Parent(s): 9ea59dd

Upload 6 files

Browse files
README.md CHANGED
@@ -1,3 +1,74 @@
1
  ---
2
- license: apache-2.0
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language: en
3
+ tags:
4
+ - exbert
5
+
6
+ license: mit
7
  ---
8
+
9
+
10
+ # GPT-2
11
+
12
+ Test the whole generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large
13
+
14
+ Pretrained model on English language using a causal language modeling (CLM) objective. It was introduced in
15
+ [this paper](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)
16
+ and first released at [this page](https://openai.com/blog/better-language-models/).
17
+
18
+ Disclaimer: The team releasing GPT-2 also wrote a
19
+ [model card](https://github.com/openai/gpt-2/blob/master/model_card.md) for their model. Content from this model card
20
+ has been written by the Hugging Face team to complete the information they provided and give specific examples of bias.
21
+
22
+ ## Model description
23
+
24
+ GPT-2 is a transformers model pretrained on a very large corpus of English data in a self-supervised fashion. This
25
+ means it was pretrained on the raw texts only, with no humans labelling them in any way (which is why it can use lots
26
+ of publicly available data) with an automatic process to generate inputs and labels from those texts. More precisely,
27
+ it was trained to guess the next word in sentences.
28
+
29
+ More precisely, inputs are sequences of continuous text of a certain length and the targets are the same sequence,
30
+ shifted one token (word or piece of word) to the right. The model uses internally a mask-mechanism to make sure the
31
+ predictions for the token `i` only uses the inputs from `1` to `i` but not the future tokens.
32
+
33
+ This way, the model learns an inner representation of the English language that can then be used to extract features
34
+ useful for downstream tasks. The model is best at what it was pretrained for however, which is generating texts from a
35
+ prompt.
36
+
37
+ ## Intended uses & limitations
38
+
39
+ You can use the raw model for text generation or fine-tune it to a downstream task. See the
40
+ [model hub](https://huggingface.co/models?filter=gpt2) to look for fine-tuned versions on a task that interests you.
41
+
42
+ ### How to use
43
+
44
+ Here is how to use the ONNX models of gpt2 to get the features of a given text:
45
+
46
+ Example using transformers.pipelines:
47
+
48
+ ```python
49
+ from transformers import AutoTokenizer, pipeline
50
+ from optimum.onnxruntime import ORTModelForCausalLM
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
53
+ model = ORTModelForCausalLM.from_pretrained("gpt2", from_transformers=True)
54
+ onnx_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
55
+
56
+ text = "My name is Philipp and I live in Germany."
57
+ gen = onnx_gen(text)
58
+ ```
59
+
60
+ Example of text generation:
61
+
62
+ ```python
63
+ from transformers import AutoTokenizer
64
+ from optimum.onnxruntime import ORTModelForCausalLM
65
+ import torch
66
+
67
+ tokenizer = AutoTokenizer.from_pretrained("optimum/gpt2")
68
+ model = ORTModelForCausalLM.from_pretrained("optimum/gpt2")
69
+
70
+ inputs = tokenizer("My name is Arthur and I live in", return_tensors="pt")
71
+
72
+ gen_tokens = model.generate(**inputs,do_sample=True,temperature=0.9, min_length=20,max_length=20)
73
+ tokenizer.batch_decode(gen_tokens)
74
+ ```
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models2/gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2Model"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.27.4",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07605a87da6fd745f2358e8fea9b0cd3802a12e90deae2237f513f387581afd1
3
+ size 653666800
decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:411f260a4f23aac189d77e4bbd2d172622c78ca89c94ca104e609e6e1e5492b1
3
+ size 653673608
gitattributes.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff