Spaces:
Runtime error
Runtime error
changing some styles
Browse files
app.css
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
:root {
|
2 |
+
--color-primary: #65db9d;
|
3 |
+
--color-secondary: #023b1d;
|
4 |
+
|
5 |
+
--class_1-maxWidth: 1024px;
|
6 |
+
}
|
7 |
+
|
8 |
+
/* GLOBAL STYLING */
|
9 |
+
.class_1 {
|
10 |
+
background-color: var(--color-secondary);
|
11 |
+
display: grid;
|
12 |
+
grid-gap: 1rem;
|
13 |
+
height: auto;
|
14 |
+
width: 100%;
|
15 |
+
max-width: var(--class_1-maxWidth);
|
16 |
+
margin: 0 auto;
|
17 |
+
}
|
18 |
+
|
19 |
+
@media (min-width: 768px) {
|
20 |
+
.class_1 {
|
21 |
+
grid-template-columns: 1fr 1fr;
|
22 |
+
}
|
23 |
+
}
|
24 |
+
|
25 |
+
.bg-primary {
|
26 |
+
background-color: var(--color-primary);
|
27 |
+
}
|
28 |
+
|
29 |
+
.text-primary {
|
30 |
+
color: var(--color-primary);
|
31 |
+
}
|
app.py
CHANGED
@@ -1,20 +1,13 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
|
3 |
from googletrans import Translator
|
4 |
-
from
|
5 |
# from huggingface_hub import snapshot_download
|
6 |
|
7 |
page = st.sidebar.selectbox("Model ", ["Finetuned on News data", "Pretrained GPT2"])
|
8 |
translator = Translator()
|
9 |
|
10 |
-
def load_model(model_name):
|
11 |
-
with st.spinner('Waiting for the model to load.....'):
|
12 |
-
# snapshot_download('flax-community/Sinhala-gpt2')
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
15 |
-
st.success('Model loaded!!')
|
16 |
-
return model, tokenizer
|
17 |
-
|
18 |
seed = st.sidebar.text_input('Starting text', 'ආයුබෝවන්')
|
19 |
seq_num = st.sidebar.number_input('Number of sequences to generate ', 1, 20, 5)
|
20 |
max_len = st.sidebar.number_input('Length of a sequence ', 5, 300, 100)
|
@@ -37,6 +30,53 @@ def generate(model, tokenizer, seed, seq_num, max_len):
|
|
37 |
for beam_out in beam_outputs:
|
38 |
sentences.append(tokenizer.decode(beam_out, skip_special_tokens=True))
|
39 |
return sentences
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
if page == 'Pretrained GPT2':
|
42 |
st.title('Sinhala Text generation with GPT2')
|
@@ -51,11 +91,13 @@ if page == 'Pretrained GPT2':
|
|
51 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
52 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
53 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
|
|
54 |
for i, seq in enumerate(seqs):
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
st.
|
|
|
59 |
except Exception as e:
|
60 |
st.exception(f'Exception: {e}')
|
61 |
else:
|
@@ -72,11 +114,17 @@ else:
|
|
72 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
73 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
74 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
|
|
75 |
for i, seq in enumerate(seqs):
|
76 |
-
st.info(f'Generated sequence {i+1}:')
|
77 |
-
st.write(seq)
|
78 |
-
st.info(f'English translation (by Google Translation):')
|
79 |
-
st.write(translator.translate(seq, src='si', dest='en').text)
|
|
|
|
|
|
|
|
|
|
|
80 |
except Exception as e:
|
81 |
st.exception(f'Exception: {e}')
|
82 |
st.markdown('____________')
|
|
|
1 |
import streamlit as st
|
2 |
+
import streamlit.components.v1 as component
|
3 |
|
4 |
from googletrans import Translator
|
5 |
+
from model import load_model
|
6 |
# from huggingface_hub import snapshot_download
|
7 |
|
8 |
page = st.sidebar.selectbox("Model ", ["Finetuned on News data", "Pretrained GPT2"])
|
9 |
translator = Translator()
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
seed = st.sidebar.text_input('Starting text', 'ආයුබෝවන්')
|
12 |
seq_num = st.sidebar.number_input('Number of sequences to generate ', 1, 20, 5)
|
13 |
max_len = st.sidebar.number_input('Length of a sequence ', 5, 300, 100)
|
|
|
30 |
for beam_out in beam_outputs:
|
31 |
sentences.append(tokenizer.decode(beam_out, skip_special_tokens=True))
|
32 |
return sentences
|
33 |
+
|
34 |
+
|
35 |
+
def html(body):
|
36 |
+
st.markdown(body, unsafe_allow_html=True)
|
37 |
+
|
38 |
+
|
39 |
+
def card_begin_str(Sinhala_sentence):
|
40 |
+
return (
|
41 |
+
"<style>div.card{background-color:#023b1d;border-radius: 5px;box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);transition: 0.3s;} small{ margin: 5px;}</style>"
|
42 |
+
'<div class="card">'
|
43 |
+
'<div class="container">'
|
44 |
+
f"<small>{Sinhala_sentence}</small>"
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
def card_end_str():
|
49 |
+
return "</div></div>"
|
50 |
+
|
51 |
+
|
52 |
+
def card(sinhala_sentence, english_sentence):
|
53 |
+
lines = [card_begin_str(sinhala_sentence), f"<p>{english_sentence}</p>", card_end_str()]
|
54 |
+
html("".join(lines))
|
55 |
+
|
56 |
+
|
57 |
+
def br(n):
|
58 |
+
html(n * "<br>")
|
59 |
+
|
60 |
+
def card_html(sinhala_sentence, english_sentence):
|
61 |
+
with open('./app.css') as f:
|
62 |
+
css_file = f.read()
|
63 |
+
return component.html(
|
64 |
+
f"""
|
65 |
+
<style>{css_file}</style>
|
66 |
+
<article class="class_1 bg-white rounded-lg p-4 relative">
|
67 |
+
<p class="font-bold items-center text-sm text-primary relative mb-1">{sinhala_sentence}</p>
|
68 |
+
|
69 |
+
<div class="flex items-center text-white-400 mb-4">
|
70 |
+
<i class="fab fa-google mx-2"></i>
|
71 |
+
<small class="text-white-400">English Translations are by Google Translate</small>
|
72 |
+
</div>
|
73 |
+
|
74 |
+
<p class="not-italic items-center text-sm text-primary relative mb-4">
|
75 |
+
{english_sentence}
|
76 |
+
</p>
|
77 |
+
</article>
|
78 |
+
"""
|
79 |
+
)
|
80 |
|
81 |
if page == 'Pretrained GPT2':
|
82 |
st.title('Sinhala Text generation with GPT2')
|
|
|
91 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
92 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
93 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
94 |
+
st.warning("English sentences were translated by Google Translate.")
|
95 |
for i, seq in enumerate(seqs):
|
96 |
+
english_sentence = translator.translate(seq, src='si', dest='en').text
|
97 |
+
# card(seq, english_sentence)
|
98 |
+
html(card_begin_str(seq))
|
99 |
+
st.info(english_sentence)
|
100 |
+
html(card_end_str())
|
101 |
except Exception as e:
|
102 |
st.exception(f'Exception: {e}')
|
103 |
else:
|
|
|
114 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
115 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
116 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
117 |
+
st.warning("English sentences were translated by Google Translate.")
|
118 |
for i, seq in enumerate(seqs):
|
119 |
+
# st.info(f'Generated sequence {i+1}:')
|
120 |
+
# st.write(seq)
|
121 |
+
# st.info(f'English translation (by Google Translation):')
|
122 |
+
# st.write(translator.translate(seq, src='si', dest='en').text)
|
123 |
+
english_sentence = translator.translate(seq, src='si', dest='en').text
|
124 |
+
# card(seq, english_sentence)
|
125 |
+
html(card_begin_str(seq))
|
126 |
+
st.info(english_sentence)
|
127 |
+
html(card_end_str())
|
128 |
except Exception as e:
|
129 |
st.exception(f'Exception: {e}')
|
130 |
st.markdown('____________')
|
model.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
|
5 |
+
def load_model(model_name):
|
6 |
+
with st.spinner('Waiting for the model to load.....'):
|
7 |
+
# snapshot_download('flax-community/Sinhala-gpt2')
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
10 |
+
st.success('Model loaded!!')
|
11 |
+
return model, tokenizer
|