nlp_proj

Runtime error

App Files Files Community

Maslov-Artem commited on Mar 8

Commit

3a905e4

•

1 Parent(s): b90441b

Add styles

Browse files

Files changed (11) hide show

.gitattributes +4 -0
app.py +109 -21
main_background.png +3 -0
model/funcs.py +20 -1
model/model.py +16 -57
pages/review_predictor.py +66 -7
pages/text_generator.py +78 -11
space_background.jpeg +3 -0
space_main_background.avif +0 -0
space_main_background.jpeg +3 -0
text_generation.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+space_main_background.jpeg filter=lfs diff=lfs merge=lfs -text
+main_background.png filter=lfs diff=lfs merge=lfs -text
+text_generation.png filter=lfs diff=lfs merge=lfs -text
+space_background.jpeg filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,5 +1,79 @@
 import streamlit as st
 static_toxicity_path = "https://imagizer.imageshack.com/v2/480x360q70/r/924/L4Ditq.jpg"
 animated_toxicity_path = (
     "https://i.kym-cdn.com/photos/images/original/001/264/967/cdc.gif"
@@ -7,44 +81,58 @@ animated_toxicity_path = (
 animated_enlighten_path = "https://gifdb.com/images/high/zen-meditation-chakras-illustration-6lujnenasnfmn8dt.gif"
 static_enlighten_path = "https://imagizer.imageshack.com/v2/668x500q70/r/922/bpoy6G.jpg"
-# Calculate the column widths dynamically
 toxicity_html = f"""
-<div class="toxicity-image-container">
-    <a href="review_predictor" target="_self" class="toxicity-link">
-        <img src="{static_toxicity_path}" class="toxicity-image" />
-    </a>
-</div>
 <style>
-    /* Define the hover state for column 1 */
-    .toxicity-image-container:hover .toxicity-image {{
         content: url("{animated_toxicity_path}");
         transform: scale(1.1); /* Enlarge the image by 10% */
         transition: transform 0.5s ease; /* Add smooth transition */
     }}
 </style>
 """
 enlighten_html = f"""
-<div class="enlighten-image-container">
-    <a href="text_generator" target="_self" class="enlighten-link">
-        <img src="{static_enlighten_path}" class="enlighten-image" />
-    </a>
-</div>
 <style>
-    /* Define the hover state for column 2 */
-    .enlighten-image-container:hover .enlighten-image {{
         content: url("{animated_enlighten_path}");
         transform: scale(1.1); /* Enlarge the image by 10% */
         transition: transform 0.5s ease; /* Add smooth transition */
     }}
 </style>
 """
-# Display HTML code with Streamlit
-st.markdown(toxicity_html, unsafe_allow_html=True)
-st.markdown(enlighten_html, unsafe_allow_html=True)
-# Display JavaScript code with Streamlit

+import base64
 import streamlit as st
+def get_base64(file_path):
+    with open(file_path, "rb") as file:
+        base64_bytes = base64.b64encode(file.read())
+        base64_string = base64_bytes.decode("utf-8")
+    return base64_string
+def set_background(png_file):
+    bin_str = get_base64(png_file)
+    page_bg_img = (
+        """
+    <style>
+    .stApp {
+    background-image: url("data:image/png;base64,%s");
+    background-size: cover;
+    }
+    </style>
+    """
+        % bin_str
+    )
+    st.markdown(page_bg_img, unsafe_allow_html=True)
+set_background("space_background.jpeg")
+# About section
+about = """
+<div class="text-shadow">
+<h1>About</h1>
+<p class="bigger">This is a multipage application created using the Streamlit library and hosted on HuggingFace Spaces.
+Our application focuses on solving various natural language processing (NLP) tasks using modern machine learning models.</p>
+</div>
+"""
+# Page 1 content
+page_1 = """
+<div class="text-shadow">
+<h1>Classification of Reviews on Clinics</h1>
+<p class="bigger">You can input your review about a clinic here, and our application will classify it using three different models:</p>
+<ol>
+<li>Logistic Regression trained on TF-IDF representation.</li>
+<li>LSTM model with attention mechanism.</li>
+<li>ruBERTtiny2.</li>
+</ol>
+</div>
+"""
+# Page 2 content
+page_2 = """
+<div class="text-shadow">
+<h1>Text Generation with GPT Model</h1>
+<p class="bigger">Ask about the mysteries of the universe</p>
+</div>
+"""
+# Project collaborators section
+project_colaborators = """
+<div class="text-shadow">
+<h1>Project Collaborators</h1>
+<ul>
+<li>Артем</li>
+<li>Валера</li>
+<li>Иван</li>
+</ul>
+</div>
+"""
+st.markdown(about, unsafe_allow_html=True)
 static_toxicity_path = "https://imagizer.imageshack.com/v2/480x360q70/r/924/L4Ditq.jpg"
 animated_toxicity_path = (
     "https://i.kym-cdn.com/photos/images/original/001/264/967/cdc.gif"
 animated_enlighten_path = "https://gifdb.com/images/high/zen-meditation-chakras-illustration-6lujnenasnfmn8dt.gif"
 static_enlighten_path = "https://imagizer.imageshack.com/v2/668x500q70/r/922/bpoy6G.jpg"
+# Toxicity image HTML
 toxicity_html = f"""
+<div class="text-shadow">
+<a href="review_predictor" target="_self">
+    <img src="{static_toxicity_path}" width="400" class="toxicity-image" />
+</a>
 <style>
+    /* Define the hover state for the image */
+    .toxicity-image:hover {{
         content: url("{animated_toxicity_path}");
         transform: scale(1.1); /* Enlarge the image by 10% */
         transition: transform 0.5s ease; /* Add smooth transition */
     }}
 </style>
+</div>
 """
+# Enlightenment image HTML
 enlighten_html = f"""
+<div class="text-shadow">
+<a href="text_generator" target="_self">
+    <img src="{static_enlighten_path}" width="400" class="enlighten-image" />
+</a>
 <style>
+    /* Define the hover state for the image */
+    .enlighten-image:hover {{
         content: url("{animated_enlighten_path}");
         transform: scale(1.1); /* Enlarge the image by 10% */
         transition: transform 0.5s ease; /* Add smooth transition */
     }}
 </style>
+</div>
 """
+# Add shadow to text content
+text_shadow_style = """
+<style>
+.text-shadow {
+    color: white;
+    text-shadow: 4px 4px 8px #000000;
+}
+.bigger {
+    font-size: 20px;
+}
+</style>
+"""
+st.markdown(text_shadow_style, unsafe_allow_html=True)
+# Display the styled text with shadow
+st.markdown(page_1, unsafe_allow_html=True)
+st.markdown(toxicity_html, unsafe_allow_html=True)
+st.markdown(page_2, unsafe_allow_html=True)
+st.markdown(enlighten_html, unsafe_allow_html=True)
+st.markdown(project_colaborators, unsafe_allow_html=True)

main_background.png ADDED Viewed

Git LFS Details

SHA256: 601b89f48519f65b3dd4c0da79a54c27812e6f051b48fda45145be424ae2a04b
Pointer size: 132 Bytes
Size of remote file: 1.93 MB

model/funcs.py CHANGED Viewed

@@ -12,11 +12,30 @@ from torch.utils.data import Dataset
 def execution_time(func):
     @wraps(func)
     def wrapper(*args, **kwargs):
         start_time = time.time()
         result = func(*args, **kwargs)
         end_time = time.time()
         execution_seconds = end_time - start_time
-        st.write(f"Model calculating time = {execution_seconds:.5f} seconds")
         return result
     return wrapper

 def execution_time(func):
     @wraps(func)
     def wrapper(*args, **kwargs):
+        # Define the styling for the execution time text
+        styled_text = """
+        <style>
+        .execution-time {
+            font-size: 20px;
+            color: #FFFFFF;
+            text-shadow: -2px -2px 4px #000000;
+        }
+        </style>
+        """
+        # Apply the styling directly before writing the execution time text
+        st.markdown(styled_text, unsafe_allow_html=True)
         start_time = time.time()
         result = func(*args, **kwargs)
         end_time = time.time()
         execution_seconds = end_time - start_time
+        # Write the styled text for the execution time
+        st.markdown(
+            f'<div class="execution-time">Model execution time = {execution_seconds:.5f} seconds</div>',
+            unsafe_allow_html=True,
+        )
         return result
     return wrapper

model/model.py CHANGED Viewed

@@ -1,18 +1,17 @@
 from typing import Tuple
 import torch
 import torch.nn as nn
-HIDDEN_SIZE = 32
-VOCAB_SIZE =196906
-EMBEDDING_DIM = 64 # embedding_dim
 SEQ_LEN = 100
-BATCH_SIZE = 64
 class BahdanauAttention(nn.Module):
     def __init__(self, hidden_size: int = HIDDEN_SIZE) -> None:
         super().__init__()
         self.hidden_size = hidden_size
         self.W_q = nn.Linear(hidden_size, hidden_size)
@@ -26,66 +25,25 @@ class BahdanauAttention(nn.Module):
         lstm_outputs: torch.Tensor,  # BATCH_SIZE x SEQ_LEN x HIDDEN_SIZE
         final_hidden: torch.Tensor,  # BATCH_SIZE x HIDDEN_SIZE
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Bahdanau Attention module
-        Args:
-            keys (torch.Tensor): lstm hidden states (BATCH_SIZE, SEQ_LEN, HIDDEN_SIZE)
-            query (torch.Tensor): lstm final hidden state (BATCH_SIZE, HIDDEN_SIZE)
-        Returns:
-            Tuple[torch.Tensor]:
-                context_matrix (BATCH_SIZE, HIDDEN_SIZE)
-                attention scores (BATCH_SIZE, SEQ_LEN)
-        """
-        # input:
-        # keys – lstm hidden states (BATCH_SIZE, SEQ_LEN, HIDDEN_SIZE)
-        # query - lstm final hidden state (BATCH_SIZE, HIDDEN_SIZE)
         keys = self.W_k(lstm_outputs)
-        # print(f'After linear keys: {keys.shape}')
         query = self.W_q(final_hidden)
-        # print(f"After linear query: {query.shape}")
-        # print(f"query.unsqueeze(1) {query.unsqueeze(1).shape}")
         sum = query.unsqueeze(1) + keys
-        # print(f"After sum: {sum.shape}")
         tanhed = self.tanh(sum)
-        # print(f"After tanhed: {tanhed.shape}")
         vector = self.W_v(tanhed).squeeze(-1)
-        # print(f"After linear vector: {vector.shape}")
         att_weights = torch.softmax(vector, -1)
-        # print(f"After softmax att_weights: {att_weights.shape}")
         context = torch.bmm(att_weights.unsqueeze(1), keys).squeeze()
-        # print(f"After bmm context: {context.shape}")
         return context, att_weights
-        # att_weights = self.linear(lstm_outputs)
-        # # print(f'After linear: {att_weights.shape, final_hidden.unsqueeze(2).shape}')
-        # att_weights = self.linear(lstm_outputs)
-        # # print(f'After linear: {att_weights.shape, final_hidden.unsqueeze(2).shape}')
-        # att_weights = torch.bmm(att_weights, final_hidden.unsqueeze(2))
-        # # print(f'After bmm: {att_weights.shape}')
-        # att_weights = F.softmax(att_weights.squeeze(2), dim=1)
-        # # print(f'After softmax: {att_weights.shape}')
-        # cntxt = torch.bmm(lstm_outputs.transpose(1, 2), att_weights.unsqueeze(2))
-        # # print(f'Context: {cntxt.shape}')
-        # concatted = torch.cat((cntxt, final_hidden.unsqueeze(2)), dim=1)
-        # # print(f'Concatted: {concatted.shape}')
-        # att_hidden = self.tanh(self.align(concatted.squeeze(-1)))
-        # # print(f'Att Hidden: {att_hidden.shape}')
-        # return att_hidden, att_weights
-# Test on random numbers
-BahdanauAttention()(torch.randn(BATCH_SIZE, SEQ_LEN, HIDDEN_SIZE), torch.randn(BATCH_SIZE, HIDDEN_SIZE))[1].shape
 class LSTMConcatAttentionEmbed(nn.Module):
@@ -97,17 +55,18 @@ class LSTMConcatAttentionEmbed(nn.Module):
         self.lstm = nn.LSTM(EMBEDDING_DIM, HIDDEN_SIZE, batch_first=True)
         self.attn = BahdanauAttention(HIDDEN_SIZE)
         self.clf = nn.Sequential(
-            nn.Linear(HIDDEN_SIZE, 128),
-            nn.Dropout(),
-            nn.Tanh(),
-            nn.Linear(128, 1)
         )
-    def forward(self, x):
         embeddings = self.embedding(x)
         outputs, (h_n, _) = self.lstm(embeddings)
         att_hidden, att_weights = self.attn(outputs, h_n.squeeze(0))
         out = self.clf(att_hidden)
         return out, att_weights

 from typing import Tuple
 import torch
 import torch.nn as nn
+HIDDEN_SIZE = 64
+VOCAB_SIZE = 196906
+EMBEDDING_DIM = 64  # embedding_dim
 SEQ_LEN = 100
+BATCH_SIZE = 16
 class BahdanauAttention(nn.Module):
     def __init__(self, hidden_size: int = HIDDEN_SIZE) -> None:
         super().__init__()
         self.hidden_size = hidden_size
         self.W_q = nn.Linear(hidden_size, hidden_size)
         lstm_outputs: torch.Tensor,  # BATCH_SIZE x SEQ_LEN x HIDDEN_SIZE
         final_hidden: torch.Tensor,  # BATCH_SIZE x HIDDEN_SIZE
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         keys = self.W_k(lstm_outputs)
         query = self.W_q(final_hidden)
         sum = query.unsqueeze(1) + keys
         tanhed = self.tanh(sum)
         vector = self.W_v(tanhed).squeeze(-1)
         att_weights = torch.softmax(vector, -1)
         context = torch.bmm(att_weights.unsqueeze(1), keys).squeeze()
         return context, att_weights
+BahdanauAttention()(
+    torch.randn(BATCH_SIZE, SEQ_LEN, HIDDEN_SIZE), torch.randn(BATCH_SIZE, HIDDEN_SIZE)
+)[1].shape
 class LSTMConcatAttentionEmbed(nn.Module):
         self.lstm = nn.LSTM(EMBEDDING_DIM, HIDDEN_SIZE, batch_first=True)
         self.attn = BahdanauAttention(HIDDEN_SIZE)
         self.clf = nn.Sequential(
+            nn.Linear(HIDDEN_SIZE, 128),
+            nn.Dropout(),
+            nn.Tanh(),
+            nn.Linear(128, 64),
+            nn.Dropout(),
+            nn.Tanh(),
+            nn.Linear(64, 1),
         )
+    def forward(self, x):
         embeddings = self.embedding(x)
         outputs, (h_n, _) = self.lstm(embeddings)
         att_hidden, att_weights = self.attn(outputs, h_n.squeeze(0))
         out = self.clf(att_hidden)
         return out, att_weights

pages/review_predictor.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import json
 import pickle
@@ -14,6 +15,32 @@ from preprocessing.preprocessing import data_preprocessing
 from preprocessing.rnn_preprocessing import preprocess_single_string
 @st.cache_resource
 def load_logreg():
     with open("vectorizer.pkl", "rb") as f:
@@ -93,7 +120,6 @@ metrics = {
 }
-col1, col2 = st.columns([1, 3])
 df = pd.DataFrame(metrics)
 df.set_index("Models", inplace=True)
 df.index.name = "Model"
@@ -101,10 +127,40 @@ df.index.name = "Model"
 st.sidebar.title("Model Selection")
 model_type = st.sidebar.radio("Select Model Type", ["Classic ML", "LSTM", "BERT"])
-st.title("Review Prediction")
 # Streamlit app code
-st.title("Sentiment Analysis with Logistic Regression")
 text_input = st.text_input("Enter your review:")
 if st.button("Predict"):
     if model_type == "Classic ML":
@@ -116,11 +172,14 @@ if st.button("Predict"):
     elif model_type == "BERT":
         prediction = predict_sentiment(text_input, model, tokenizer, "cpu")
     if prediction == 1:
-        st.write("prediction")
-        st.write("Отзыв положительный")
     elif prediction == 0:
-        st.write("prediction")
-        st.write("Отзыв отрицательный")
 st.write(df)

+import base64
 import json
 import pickle
 from preprocessing.rnn_preprocessing import preprocess_single_string
+def get_base64(file_path):
+    with open(file_path, "rb") as file:
+        base64_bytes = base64.b64encode(file.read())
+        base64_string = base64_bytes.decode("utf-8")
+    return base64_string
+def set_background(png_file):
+    bin_str = get_base64(png_file)
+    page_bg_img = (
+        """
+    <style>
+    .stApp {
+    background-image: url("data:image/png;base64,%s");
+    background-size: auto;
+    }
+    </style>
+    """
+        % bin_str
+    )
+    st.markdown(page_bg_img, unsafe_allow_html=True)
+set_background("main_background.png")
 @st.cache_resource
 def load_logreg():
     with open("vectorizer.pkl", "rb") as f:
 }
 df = pd.DataFrame(metrics)
 df.set_index("Models", inplace=True)
 df.index.name = "Model"
 st.sidebar.title("Model Selection")
 model_type = st.sidebar.radio("Select Model Type", ["Classic ML", "LSTM", "BERT"])
+styled_text = """
+<style>
+.styled-title {
+    color: #FF00FF;
+    font-size: 40px;
+    text-shadow: -2px -2px 4px #000000;
+    -webkit-text-stroke-width: 1px;
+    -webkit-text-stroke-color: #000000;
+}
+.positive {
+    color: #00FF00;
+    font-size: 30px;
+    text-shadow: -2px -2px 4px #000000;
+    -webkit-text-stroke-width: 1px;
+    -webkit-text-stroke-color: #000000;
+}
+.negative {
+    color: #FF0000;
+    font-size: 30px;
+    text-shadow: -2px -2px 4px #000000;
+    -webkit-text-stroke-width: 1px;
+    -webkit-text-stroke-color: #000000;
+}
+</style>
+"""
+st.markdown(styled_text, unsafe_allow_html=True)
 # Streamlit app code
+st.markdown('<div class="styled-title">Review Prediction</div>', unsafe_allow_html=True)
 text_input = st.text_input("Enter your review:")
 if st.button("Predict"):
     if model_type == "Classic ML":
     elif model_type == "BERT":
         prediction = predict_sentiment(text_input, model, tokenizer, "cpu")
+    # Apply different styles based on prediction result
     if prediction == 1:
+        st.markdown(
+            f'<div class="positive">Отзыв положительный</div>', unsafe_allow_html=True
+        )
     elif prediction == 0:
+        st.markdown(
+            f'<div class="negative">Отзыв отрицательный</div>', unsafe_allow_html=True
+        )
 st.write(df)

pages/text_generator.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import streamlit as st
 import torch
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
@@ -5,6 +7,32 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
 from model.funcs import execution_time
 @st.cache_data
 def load_model():
     model_path = "17/"
@@ -18,26 +46,65 @@ tokenizer, model = load_model()
 @execution_time
-def generate_text(promt):
-    promt = tokenizer.encode(promt, return_tensors="pt")
     model.eval()
     with torch.no_grad():
         out = model.generate(
-            promt,
             do_sample=True,
-            num_beams=2,
-            temperature=1.5,
-            top_p=0.9,
-            max_length=150,
         )
     out = list(map(tokenizer.decode, out))[0]
     return out
-promt = st.text_input("Ask a question")
-generate = st.button("Generate")
 if generate:
-    if not promt:
         st.write("42")
     else:
-        st.write(generate_text(promt))

+import base64
 import streamlit as st
 import torch
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 from model.funcs import execution_time
+def get_base64(file_path):
+    with open(file_path, "rb") as file:
+        base64_bytes = base64.b64encode(file.read())
+        base64_string = base64_bytes.decode("utf-8")
+    return base64_string
+def set_background(png_file):
+    bin_str = get_base64(png_file)
+    page_bg_img = (
+        """
+    <style>
+    .stApp {
+    background-image: url("data:image/png;base64,%s");
+    background-size: cover;
+    }
+    </style>
+    """
+        % bin_str
+    )
+    st.markdown(page_bg_img, unsafe_allow_html=True)
+set_background("text_generation.png")
 @st.cache_data
 def load_model():
     model_path = "17/"
 @execution_time
+def generate_text(
+    prompt, num_beams=2, temperature=1.5, top_p=0.9, top_k=3, max_length=150
+):
+    prompt = tokenizer.encode(prompt, return_tensors="pt")
     model.eval()
     with torch.no_grad():
         out = model.generate(
+            prompt,
             do_sample=True,
+            num_beams=num_beams,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            max_length=max_length,
         )
     out = list(map(tokenizer.decode, out))[0]
     return out
+with st.sidebar:
+    num_beams = st.slider("Number of Beams", min_value=1, max_value=5, value=2)
+    temperature = st.slider("Temperature", min_value=0.1, max_value=2.0, value=1.5)
+    top_p = st.slider("Top-p", min_value=0.1, max_value=1.0, value=0.9)
+    top_k = st.slider("Top-k", min_value=1, max_value=10, value=3)
+    max_length = st.slider("Maximum Length", min_value=20, max_value=300, value=150)
+styled_text = """
+<style>
+.styled-text {
+    font-size: 30px;
+    text-shadow: -2px -2px 4px #000000;
+    color: #FFFFFF;
+    -webkit-text-stroke-width: 1px;
+    -webkit-text-stroke-color: #000000;
+}
+</style>
+"""
+st.markdown(styled_text, unsafe_allow_html=True)
+prompt = st.text_input(
+    "Ask a question",
+    key="question_input",
+    placeholder="Type here...",
+    type="default",
+    value="",
+)
+generate = st.button("Generate", key="generate_button")
 if generate:
+    if not prompt:
         st.write("42")
     else:
+        generated_text = generate_text(
+            prompt, num_beams, temperature, top_p, top_k, max_length
+        )
+        paragraphs = generated_text.split("\n")
+        styled_paragraphs = [
+            f'<div class="styled-text">{paragraph}</div>' for paragraph in paragraphs
+        ]
+        styled_generated_text = " ".join(styled_paragraphs)
+        st.markdown(styled_generated_text, unsafe_allow_html=True)

space_background.jpeg ADDED Viewed

Git LFS Details

SHA256: b42be30730cb6ddff0686c7331153ff6b378e77f7152f2ae898e495e7d592ab9
Pointer size: 131 Bytes
Size of remote file: 400 kB

space_main_background.avif ADDED Viewed

space_main_background.jpeg ADDED Viewed

Git LFS Details

SHA256: 09f95f7481849c2787568396b97564b6b64e8b0653f5386c2d77a3a3f292b64e
Pointer size: 132 Bytes
Size of remote file: 2.03 MB

text_generation.png ADDED Viewed

Git LFS Details

SHA256: dfcc883890dcd2f2ee20fe8d431284fe2a2bccf925f7fb3abfcd37f24e90f9c2
Pointer size: 132 Bytes
Size of remote file: 1.62 MB