Add pipeline example
Browse files
README.md
CHANGED
@@ -12,3 +12,48 @@ widget:
|
|
12 |
|
13 |
Title generator based on Neo-GPT 125M fine-tuned on a dataset of 39k url's title. All urls are selected on the TOP 10 google on a list of Keywords about "Electric car" - "Electric car for sale".
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
Title generator based on Neo-GPT 125M fine-tuned on a dataset of 39k url's title. All urls are selected on the TOP 10 google on a list of Keywords about "Electric car" - "Electric car for sale".
|
14 |
|
15 |
+
# Pipeline example
|
16 |
+
|
17 |
+
import pandas as pd
|
18 |
+
```python
|
19 |
+
from transformers import AutoModelForMaskedLM
|
20 |
+
from transformers import GPT2Tokenizer, TrainingArguments, AutoModelForCausalLM, AutoConfig
|
21 |
+
|
22 |
+
model = AutoModelForCausalLM.from_pretrained('Martian/Neo-GPT-Title-Generation-Electric-Car')
|
23 |
+
|
24 |
+
tokenizer = GPT2Tokenizer.from_pretrained('Martian/Neo-GPT-Title-Generation-Electric-Car', bos_token='<|startoftext|>',
|
25 |
+
eos_token='<|endoftext|>', pad_token='<|pad|>')
|
26 |
+
|
27 |
+
prompt = "<|startoftext|> Electric car"
|
28 |
+
|
29 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
30 |
+
|
31 |
+
gen_tokens = model.generate(input_ids, do_sample=True, top_k=100, min_length = 30, max_length=150, top_p=0.90, num_return_sequences=20, skip_special_tokens=True)
|
32 |
+
|
33 |
+
list_title_gen = []
|
34 |
+
|
35 |
+
for i, sample_output in enumerate(gen_tokens):
|
36 |
+
title = tokenizer.decode(sample_output, skip_special_tokens=True)
|
37 |
+
list_title_gen.append(title)
|
38 |
+
|
39 |
+
for i in list_title_gen:
|
40 |
+
try:
|
41 |
+
list_title_gen[list_title_gen.index(i)] = i.split(' | ')[0]
|
42 |
+
except:
|
43 |
+
continue
|
44 |
+
try:
|
45 |
+
list_title_gen[list_title_gen.index(i)] = i.split(' - ')[0]
|
46 |
+
except:
|
47 |
+
continue
|
48 |
+
try:
|
49 |
+
list_title_gen[list_title_gen.index(i)] = i.split(' — ')[0]
|
50 |
+
except:
|
51 |
+
continue
|
52 |
+
|
53 |
+
list_title_gen = [sub.replace('�', ' ').replace('\r',' ').replace('\n',' ').replace('\t', ' ').replace('\xa0', '') for sub in list_title_gen]
|
54 |
+
list_title_gen = [sub if sub != '<|startoftext|> Electric car' else '' for sub in list_title_gen]
|
55 |
+
|
56 |
+
for i in list_title_gen:
|
57 |
+
print(i)
|
58 |
+
|
59 |
+
```
|