Spaces:

joaogante
/

color-coded-text-generation

Running

App Files Files Community

joaogante HF staff commited on Feb 7, 2023

Commit

282bfce

•

1 Parent(s): e2ed84a

change demo from gpt2 to flan-t5

Browse files

Files changed (1) hide show

app.py +16 -13

app.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import gradio as gr
-from transformers import GPT2Tokenizer, AutoModelForCausalLM
 import numpy as np
-MODEL_NAME = "gpt2"
 if __name__ == "__main__":
     # Define your model and your tokenizer
-    tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
-    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
         model.config.pad_token_id = model.config.eos_token_id
@@ -34,7 +34,7 @@ if __name__ == "__main__":
         """
         inputs = tokenizer([prompt], return_tensors="pt")
         outputs = model.generate(
-            **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True, do_sample=True
         )
         # Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
         transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
@@ -43,8 +43,11 @@ if __name__ == "__main__":
         input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
         generated_tokens = outputs.sequences[:, input_length:]
-        # Initialize the highlighted output with the prompt, which will have no color label
-        highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
         # Get the (decoded_token, label) pairs for the generated tokens
         for token, proba in zip(generated_tokens[0], transition_proba[0]):
             this_label = None
@@ -64,18 +67,18 @@ if __name__ == "__main__":
             # 🌈 Color Coded Text Generation 🌈
             This is a demo of how you can obtain the probabilities of each generated token, and use them to
-            color code the model output.
-            Feel free to clone this demo and modify it to your needs 🤗
-            Internally, it relies on [`compute_transition_scores`](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.compute_transition_scores),
             which was added in `transformers` v4.26.0.
             """
         )
         with gr.Row():
             with gr.Column():
-                prompt = gr.Textbox(label="Prompt", lines=3, value="Today is")
-                button = gr.Button(f"Generate with {MODEL_NAME}, using sampling!")
             with gr.Column():
                 highlighted_text = gr.HighlightedText(
                     label="Highlighted generation",

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import numpy as np
+MODEL_NAME = "google/flan-t5-base"
 if __name__ == "__main__":
     # Define your model and your tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)  # or AutoModelForCausalLM
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
         model.config.pad_token_id = model.config.eos_token_id
         """
         inputs = tokenizer([prompt], return_tensors="pt")
         outputs = model.generate(
+            **inputs, max_new_tokens=50, return_dict_in_generate=True, output_scores=True
         )
         # Important: don't forget to set `normalize_logits=True` to obtain normalized probabilities (i.e. sum(p) = 1)
         transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
         input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
         generated_tokens = outputs.sequences[:, input_length:]
+        # On decoder-only models, you might want to initialize the highlighted output with the prompt (wo labels)
+        if model.config.is_encoder_decoder:
+            highlighted_out = []
+        else:
+            highlighted_out = [(tokenizer.decode(token), None) for token in inputs.input_ids]
         # Get the (decoded_token, label) pairs for the generated tokens
         for token, proba in zip(generated_tokens[0], transition_proba[0]):
             this_label = None
             # 🌈 Color Coded Text Generation 🌈
             This is a demo of how you can obtain the probabilities of each generated token, and use them to
+            color code the model output. Internally, it relies on
+            [`compute_transition_scores`](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.compute_transition_scores),
             which was added in `transformers` v4.26.0.
+            🤗 Feel free to clone this demo and modify it to your needs 🤗
             """
         )
         with gr.Row():
             with gr.Column():
+                prompt = gr.Textbox(label="Prompt", lines=3, value="Translate to English: omelette du fromage")
+                button = gr.Button(f"Generate with {MODEL_NAME}")
             with gr.Column():
                 highlighted_text = gr.HighlightedText(
                     label="Highlighted generation",