Add Todo + Pipeline example
Browse files
README.md
CHANGED
@@ -14,8 +14,8 @@ Title generator based on Neo-GPT 125M fine-tuned on a dataset of 39k url's title
|
|
14 |
|
15 |
# Pipeline example
|
16 |
|
17 |
-
import pandas as pd
|
18 |
```python
|
|
|
19 |
from transformers import AutoModelForMaskedLM
|
20 |
from transformers import GPT2Tokenizer, TrainingArguments, AutoModelForCausalLM, AutoConfig
|
21 |
|
@@ -50,10 +50,16 @@ for i in list_title_gen:
|
|
50 |
except:
|
51 |
continue
|
52 |
|
53 |
-
list_title_gen = [sub.replace('�', ' ').replace('
|
|
|
54 |
list_title_gen = [sub if sub != '<|startoftext|> Electric car' else '' for sub in list_title_gen]
|
55 |
|
56 |
for i in list_title_gen:
|
57 |
print(i)
|
58 |
|
59 |
```
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Pipeline example
|
16 |
|
|
|
17 |
```python
|
18 |
+
import pandas as pd
|
19 |
from transformers import AutoModelForMaskedLM
|
20 |
from transformers import GPT2Tokenizer, TrainingArguments, AutoModelForCausalLM, AutoConfig
|
21 |
|
|
|
50 |
except:
|
51 |
continue
|
52 |
|
53 |
+
list_title_gen = [sub.replace('�', ' ').replace('\\r',' ').replace('\
|
54 |
+
',' ').replace('\\t', ' ').replace('\\xa0', '') for sub in list_title_gen]
|
55 |
list_title_gen = [sub if sub != '<|startoftext|> Electric car' else '' for sub in list_title_gen]
|
56 |
|
57 |
for i in list_title_gen:
|
58 |
print(i)
|
59 |
|
60 |
```
|
61 |
+
|
62 |
+
# Todo
|
63 |
+
- Improve the quality of the training sample
|
64 |
+
- Add more data
|
65 |
+
|