JuanJoseMV commited on
Commit
8bff16c
·
1 Parent(s): 65145f1

testing locally

Browse files
NeuralTextGenerator.py CHANGED
@@ -20,7 +20,7 @@ DEFAULT_DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
20
 
21
 
22
  class BertTextGenerator:
23
- def __init__(self, model_version, tokenizer, device=DEFAULT_DEVICE, use_apex=APEX_AVAILABLE, use_fast=True,
24
  do_basic_tokenize=True):
25
  """
26
  Wrapper of a BERT model from AutoModelForMaskedLM from huggingfaces.
@@ -47,6 +47,9 @@ class BertTextGenerator:
47
  self.model, optimizer = amp.initialize(self.model, optimizer, opt_level="O2", keep_batchnorm_fp32=True,
48
  loss_scale="dynamic")
49
 
 
 
 
50
  self.tokenizer = AutoTokenizer.from_pretrained(tokenizer, do_lower_case="uncased" in model_version,
51
  use_fast=use_fast,
52
  do_basic_tokenize=do_basic_tokenize) # added to avoid splitting of unused tokens
 
20
 
21
 
22
  class BertTextGenerator:
23
+ def __init__(self, model_version, tokenizer=None, device=DEFAULT_DEVICE, use_apex=APEX_AVAILABLE, use_fast=True,
24
  do_basic_tokenize=True):
25
  """
26
  Wrapper of a BERT model from AutoModelForMaskedLM from huggingfaces.
 
47
  self.model, optimizer = amp.initialize(self.model, optimizer, opt_level="O2", keep_batchnorm_fp32=True,
48
  loss_scale="dynamic")
49
 
50
+ if tokenizer is None:
51
+ tokenizer = model_version
52
+
53
  self.tokenizer = AutoTokenizer.from_pretrained(tokenizer, do_lower_case="uncased" in model_version,
54
  use_fast=use_fast,
55
  do_basic_tokenize=do_basic_tokenize) # added to avoid splitting of unused tokens
__pycache__/NeuralTextGenerator.cpython-310.pyc CHANGED
Binary files a/__pycache__/NeuralTextGenerator.cpython-310.pyc and b/__pycache__/NeuralTextGenerator.cpython-310.pyc differ
 
__pycache__/app.cpython-310.pyc ADDED
Binary file (2.49 kB). View file
 
app.py CHANGED
@@ -1,25 +1,12 @@
 
 
 
 
1
  import gradio as gr
2
  from NeuralTextGenerator import BertTextGenerator
3
 
4
  # Load models
5
-
6
- ## BERT
7
- BERT_model_name = "Twitter/twhin-bert-large"
8
- BERT = BertTextGenerator(BERT_model_name, tokenizer=BERT_model_name)
9
-
10
- ## RoBERTa
11
- RoBERTa_model_name = "cardiffnlp/twitter-xlm-roberta-base"
12
- RoBERTa = BertTextGenerator(RoBERTa_model_name, tokenizer=RoBERTa_model_name)
13
-
14
- ## Finetuned BERT
15
- finetunned_BERT_model_name = "JuanJoseMV/BERT_text_gen"
16
- finetunned_BERT = BertTextGenerator(finetunned_BERT_model_name, tokenizer='bert-base-uncased')
17
-
18
- ## Finetuned RoBERTa
19
- finetunned_RoBERTa_model_name = "JuanJoseMV/XLM_RoBERTa_text_gen"
20
- finetunned_RoBERTa = BertTextGenerator(finetunned_RoBERTa_model_name, tokenizer=finetunned_RoBERTa_model_name)
21
-
22
- ## Add special tokens
23
  special_tokens = [
24
  '[POSITIVE-0]',
25
  '[POSITIVE-1]',
@@ -29,58 +16,95 @@ special_tokens = [
29
  '[NEGATIVE-2]'
30
  ]
31
 
32
- BERT.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
33
- BERT.model.resize_token_embeddings(len(BERT.tokenizer))
 
 
 
 
34
 
35
- RoBERTa.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
36
- RoBERTa.model.resize_token_embeddings(len(RoBERTa.tokenizer))
 
37
 
38
- finetunned_BERT.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
39
- finetunned_BERT.model.resize_token_embeddings(len(finetunned_BERT.tokenizer))
40
 
41
- def sentence_builder(selected_model, n_sentences, max_iter, sentiment, seed_text):
 
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  if selected_model == "Finetuned_RoBERTa":
44
  generator = finetunned_RoBERTa
45
- elif selected_model == "Finetuned_BERT":
46
- generator = finetunned_BERT
47
- elif selected_model == "RoBERTa":
 
48
  generator = RoBERTa
49
  else:
50
  generator = BERT
51
 
 
52
  parameters = {'n_sentences': n_sentences,
53
- 'batch_size': 2,
54
  'avg_len':30,
55
  'max_len':50,
56
- # 'std_len' : 3,
57
  'generation_method':'parallel',
58
  'sample': True,
59
  'burnin': 450,
60
  'max_iter': max_iter,
61
- 'top_k': 100,
62
  'seed_text': f"[{sentiment}-0] [{sentiment}-1] [{sentiment}-2] {seed_text}",
 
63
  'verbose': True
64
  }
65
  sents = generator.generate(**parameters)
66
- gen_text = ''
67
 
 
 
68
  for i, s in enumerate(sents):
69
- gen_text += f'- GENERATED TWEET #{i}: {s}\n'
 
70
 
71
  return gen_text
72
 
73
-
74
  demo = gr.Interface(
75
  sentence_builder,
76
  [
77
- gr.Radio(["BERT", "RoBERTa", "Finetuned_RoBERTa", "Finetunned_BERT"], value="BERT", label="Generator model"),
78
- gr.Slider(1, 15, value=2, label="Num. Tweets", step=1, info="Number of tweets to be generated."),
79
- gr.Slider(50, 500, value=100, label="Max. iter", info="Maximum number of iterations for the generation."),
80
- gr.Radio(["POSITIVE", "NEGATIVE"], value="POSITIVE", label="Sentiment to generate"),
81
- gr.Textbox('', label="Seed text", info="Seed text for the generation.")
 
 
 
82
  ],
83
  "text",
84
  )
85
 
 
86
  demo.launch()
 
1
+ import os
2
+ os.environ["CUDA_VISIBLE_DEVICES"] = "1"
3
+
4
+ import re
5
  import gradio as gr
6
  from NeuralTextGenerator import BertTextGenerator
7
 
8
  # Load models
9
+ ## Special tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  special_tokens = [
11
  '[POSITIVE-0]',
12
  '[POSITIVE-1]',
 
16
  '[NEGATIVE-2]'
17
  ]
18
 
19
+ ## Finetuned RoBERTa
20
+ finetunned_RoBERTa_model_name = "JuanJoseMV/XLM_RoBERTa_text_gen"
21
+ finetunned_RoBERTa = BertTextGenerator(finetunned_RoBERTa_model_name)
22
+
23
+ finetunned_RoBERTa.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
24
+ finetunned_RoBERTa.model.resize_token_embeddings(len(finetunned_RoBERTa.tokenizer))
25
 
26
+ ## Finetuned RoBERTa hate
27
+ finetunned_RoBERTa_Hate_model_name = "JuanJoseMV/XLM_RoBERTa_text_gen_FT_Hate"
28
+ finetunned_RoBERTa_Hate = BertTextGenerator(finetunned_RoBERTa_Hate_model_name)
29
 
30
+ # finetunned_RoBERTa_Hate.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
31
+ # finetunned_RoBERTa_Hate.model.resize_token_embeddings(len(finetunned_RoBERTa_Hate.tokenizer))
32
 
33
+ # ## Finetuned BERT
34
+ # finetunned_BERT_model_name = "JuanJoseMV/BERT_text_gen"
35
+ # finetunned_BERT = BertTextGenerator(finetunned_BERT_model_name, tokenizer='Twitter/twhin-bert-large')
36
+
37
+ # finetunned_BERT.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
38
+ # finetunned_BERT.model.resize_token_embeddings(len(finetunned_BERT.tokenizer))
39
+
40
+ ## RoBERTa
41
+ RoBERTa_model_name = "cardiffnlp/twitter-xlm-roberta-base"
42
+ RoBERTa = BertTextGenerator(RoBERTa_model_name)
43
 
44
+ ## BERT
45
+ BERT_model_name = "Twitter/twhin-bert-large"
46
+ BERT = BertTextGenerator(BERT_model_name)
47
+
48
+ def sentence_builder(
49
+ selected_model,
50
+ n_sentences,
51
+ max_iter,
52
+ temperature,
53
+ top_k,
54
+ sentiment,
55
+ seed_text
56
+ ):
57
+ # Select model
58
  if selected_model == "Finetuned_RoBERTa":
59
  generator = finetunned_RoBERTa
60
+ elif selected_model == "Finetuned_RoBERTa_Hate":
61
+ generator = finetunned_RoBERTa_Hate
62
+ sentiment = 'HATE'
63
+ if selected_model == "RoBERTa":
64
  generator = RoBERTa
65
  else:
66
  generator = BERT
67
 
68
+ # Generate
69
  parameters = {'n_sentences': n_sentences,
70
+ 'batch_size': n_sentences if n_sentences < 10 else 10,
71
  'avg_len':30,
72
  'max_len':50,
73
+ 'std_len' : 3,
74
  'generation_method':'parallel',
75
  'sample': True,
76
  'burnin': 450,
77
  'max_iter': max_iter,
78
+ 'top_k': top_k,
79
  'seed_text': f"[{sentiment}-0] [{sentiment}-1] [{sentiment}-2] {seed_text}",
80
+ 'temperature': temperature,
81
  'verbose': True
82
  }
83
  sents = generator.generate(**parameters)
 
84
 
85
+ # Clean
86
+ gen_text = ''
87
  for i, s in enumerate(sents):
88
+ clean_sent = re.sub(r'\[.*?\]', '', s)
89
+ gen_text += f'- GENERATED TWEET #{i + 1}: {clean_sent}\n\n'
90
 
91
  return gen_text
92
 
93
+ # Set Demo
94
  demo = gr.Interface(
95
  sentence_builder,
96
  [
97
+ gr.Radio(["BERT", "RoBERTa", "Finetuned_RoBERTa", "Finetuned_RoBERTa_Hate"], value="RoBERTa", label="Generator model"),
98
+ # gr.Radio(["BERT", "RoBERTa"], value="BERT", label="Generator model"),
99
+ gr.Slider(1, 15, value=5, label="Num. Tweets", step=1, info="Number of tweets to be generated."),
100
+ gr.Slider(50, 500, value=300, label="Max. iter", info="Maximum number of iterations for the generation."),
101
+ gr.Slider(0, 1.0, value=0.8, step=0.05, label="Temperature", info="Temperature parameter for the generation."),
102
+ gr.Slider(1, 200, value=130, step=1, label="Top k", info="Top k parameter for the generation."),
103
+ gr.Radio(["POSITIVE", "NEGATIVE"], value="NEGATIVE", label="Sentiment to generate"),
104
+ gr.Textbox('ATP Finals in Turin', label="Seed text", info="Seed text for the generation.")
105
  ],
106
  "text",
107
  )
108
 
109
+ # Run Demo
110
  demo.launch()
flagged/log.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Generator model,Num. Tweets,Max. iter,Temperature,Top k,Sentiment to generate,Seed text,output,flag,username,timestamp
2
+ BERT,2,300,0.7,130,POSITIVE,Awesome ATP Finals in Turin,"'- GENERATED TWEET #1: Awesome ATP Finals in Turin,,,,,,,,,,,,,,,,,,,,, from Nikita Dancin ⚡️
3
+
4
+ - GENERATED TWEET #2: Awesome ATP Finals in Turin👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏👏
5
+
6
+ ",,,2023-03-24 11:04:02.609689