File size: 3,896 Bytes
0ba900e ef1f66e 0ba900e 8384a73 0ba900e b252546 0ba900e 8a66e28 b252546 0ba900e 43b8437 619dca5 b252546 619dca5 b252546 0ba900e 8384a73 0ba900e 8384a73 619dca5 8384a73 b252546 0ba900e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import copy
import logging
from typing import List
import torch
import streamlit as st
from transformers import BertTokenizer, TFAutoModelForMaskedLM
from transformers import CamembertModel, CamembertTokenizer
from rhyme_with_ai.utils import color_new_words, sanitize
from rhyme_with_ai.rhyme import query_rhyme_words
from rhyme_with_ai.rhyme_generator import RhymeGenerator
DEFAULT_QUERY = "Machines will take over the world soon"
N_RHYMES = 10
LANGUAGE = st.sidebar.radio("Language", ["english", "dutch", "french"],0)
if LANGUAGE == "english":
MODEL_PATH = "bert-large-cased-whole-word-masking"
ITER_FACTOR = 5
elif LANGUAGE == "dutch":
MODEL_PATH = "GroNLP/bert-base-dutch-cased"
ITER_FACTOR = 10 # Faster model
elif LANGUAGE == "french":
MODEL_PATH = "camembert-base"
ITER_FACTOR = 5
else:
raise NotImplementedError(f"Unsupported language ({LANGUAGE}) expected 'english','dutch' or 'french.")
"""LANGUAGE = "french"
MODEL_PATH = "camembert-base"
ITER_FACTOR = 5"""
def main():
st.markdown(
"<sup>Created with "
"[Datamuse](https://www.datamuse.com/api/), "
"[Mick's rijmwoordenboek](https://rijmwoordenboek.nl), "
"[Hugging Face](https://huggingface.co/), "
"[Streamlit](https://streamlit.io/) and "
"[App Engine](https://cloud.google.com/appengine/)."
" Read our [blog](https://blog.godatadriven.com/rhyme-with-ai) "
"or check the "
"[source](https://github.com/godatadriven/rhyme-with-ai).</sup>",
unsafe_allow_html=True,
)
st.title("Rhyme with AI")
query = get_query()
if not query:
query = DEFAULT_QUERY
rhyme_words_options = query_rhyme_words(query, n_rhymes=N_RHYMES,language=LANGUAGE)
if rhyme_words_options:
logging.getLogger(__name__).info("Got rhyme words: %s", rhyme_words_options)
start_rhyming(query, rhyme_words_options)
else:
st.write("No rhyme words found")
def get_query():
q = sanitize(
st.text_input("Write your first line and press ENTER to rhyme:", DEFAULT_QUERY)
)
if not q:
return DEFAULT_QUERY
return q
def start_rhyming(query, rhyme_words_options):
st.markdown("## My Suggestions:")
progress_bar = st.progress(0)
status_text = st.empty()
max_iter = len(query.split()) * ITER_FACTOR
rhyme_words = rhyme_words_options[:N_RHYMES]
model, tokenizer = load_model(MODEL_PATH, LANGUAGE)
sentence_generator = RhymeGenerator(model, tokenizer)
sentence_generator.start(query, rhyme_words)
current_sentences = [" " for _ in range(N_RHYMES)]
for i in range(max_iter):
previous_sentences = copy.deepcopy(current_sentences)
current_sentences = sentence_generator.mutate()
display_output(status_text, query, current_sentences, previous_sentences)
progress_bar.progress(i / (max_iter - 1))
st.balloons()
@st.cache(allow_output_mutation=True)
def load_model(model_path, language):
if language != "french":
return (
TFAutoModelForMaskedLM.from_pretrained(model_path),
BertTokenizer.from_pretrained(model_path),
)
else :
tokenizer = CamembertTokenizer(vocab_file='rhyme_with_ai/dict.txt')
return (
CamembertModel.from_pretrained(model_path),
tokenizer.from_pretrained(model_path),
)
def display_output(status_text, query, current_sentences, previous_sentences):
print_sentences = []
for new, old in zip(current_sentences, previous_sentences):
formatted = color_new_words(new, old)
after_comma = "<li>" + formatted.split(",")[1][:-2] + "</li>"
print_sentences.append(after_comma)
status_text.markdown(
query + ",<br>" + "".join(print_sentences), unsafe_allow_html=True
)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
main() |