Spaces:

BigSalmon
/

FormalInformalConciseWordy

Runtime error

App Files Files Community

BigSalmon commited on Dec 23, 2021

Commit

2473dde

1 Parent(s): 5c8f4a3

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -2

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ def load_model(model_name):
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(model_name)
     return model, tokenizer
-def extend(input_text, num_return_sequences, max_size=20, top_k=50, top_p=0.95):
     if len(input_text) == 0:
         input_text = ""
     encoded_prompt = tokenizer.encode(
@@ -27,10 +27,18 @@ def extend(input_text, num_return_sequences, max_size=20, top_k=50, top_p=0.95):
     else:
         input_ids = encoded_prompt
     output_sequences = model.generate(
     input_ids=input_ids,
     max_length=max_size + len(encoded_prompt[0]),
     top_k=top_k,
     top_p=top_p,
     do_sample=True,
     num_return_sequences=num_return_sequences)
@@ -73,6 +81,8 @@ if __name__ == "__main__":
     num_return_sequences = st.sidebar.slider("Outputs", 1, 50, 5,help="The number of outputs to be returned.")
     top_k = st.sidebar.slider("Top-K", 0, 100, 40, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.")
     top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.92, help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.")
     if st.button("Generate Text"):
         with st.spinner(text="Generating results..."):
             st.subheader("Result")
@@ -83,7 +93,8 @@ if __name__ == "__main__":
                             num_return_sequences=int(num_return_sequences),
                             max_size=int(max_len),
                             top_k=int(top_k),
-                            top_p=float(top_p))
             print("Done length: " + str(len(result)) + " bytes")
             #<div class="rtl" dir="rtl" style="text-align:right;">
             st.markdown(f"{result}", unsafe_allow_html=True)

     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(model_name)
     return model, tokenizer
+def extend(input_text, num_return_sequences, max_size=20, top_k=50, top_p=0.95, bad_words):
     if len(input_text) == 0:
         input_text = ""
     encoded_prompt = tokenizer.encode(
     else:
         input_ids = encoded_prompt
+    bad_words = bad_words.split()
+    bad_word_ids = []
+    for bad_word in bad_words:
+        bad_word = " " + bad_word
+        ids = tokenizer(bad_word).input_ids
+        bad_word_ids.append(ids)
     output_sequences = model.generate(
     input_ids=input_ids,
     max_length=max_size + len(encoded_prompt[0]),
     top_k=top_k,
+    bad_word_ids = bad_word_ids,
     top_p=top_p,
     do_sample=True,
     num_return_sequences=num_return_sequences)
     num_return_sequences = st.sidebar.slider("Outputs", 1, 50, 5,help="The number of outputs to be returned.")
     top_k = st.sidebar.slider("Top-K", 0, 100, 40, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.")
     top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.92, help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.")
+    bad_words = st.text_input("Words You Do Not Want Generated", " core lemon height time ")
     if st.button("Generate Text"):
         with st.spinner(text="Generating results..."):
             st.subheader("Result")
                             num_return_sequences=int(num_return_sequences),
                             max_size=int(max_len),
                             top_k=int(top_k),
+                            top_p=float(top_p),
+                            bad_words = bad_words))
             print("Done length: " + str(len(result)) + " bytes")
             #<div class="rtl" dir="rtl" style="text-align:right;">
             st.markdown(f"{result}", unsafe_allow_html=True)