nlp_proj

Runtime error

App Files Files Community

Maslov-Artem commited on Mar 8

Commit

b90441b

•

1 Parent(s): c747562

New weights and streamlit features

Browse files

Files changed (9) hide show

.gitignore +13 -2
17/config.json +1 -1
17/model.safetensors +1 -1
app.py +48 -1
enlightened_static.jpg +0 -0
model/funcs.py +18 -0
model/model_weights.pt +2 -2
preprocessing.py +0 -30
static_toxic.jpg +0 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,17 @@
 .venv
 healthcare_facilities_reviews.jsonl
 *.ipynb
-__pycache__/
 *.csv
-.ipynb_checkoints/

 .venv
 healthcare_facilities_reviews.jsonl
 *.ipynb
+/__pycache__/
 *.csv
+/.ipynb_checkoints/
+.DS_Store
+RNN/
+bert/
+cached_lm_GPT2Tokenizer_64_wiki_content.txt
+cached_lm_GPT2Tokenizer_64_wiki_content.txt.lock
+finetuned/
+.gitattributes
+*.txt
+model/.ipynb_checkpoints/
+model/__pycache__/
+preprocessing/__pycache__/

17/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "/content/drive/MyDrive/model__weights",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

17/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a99f27f7efc5a609d3bb2f30d15980d3384ecd47f4b0806c251523071a7648a
 size 500941440

 version https://git-lfs.github.com/spec/v1
+oid sha256:e39686188a07e05ea4860c12df5bb451c630233ccebeee26dc24a4c3219b3b53
 size 500941440

app.py CHANGED Viewed

@@ -1,3 +1,50 @@
 import streamlit as st
-st.title("Sentiment Analysis with Logistic Regression")

 import streamlit as st
+static_toxicity_path = "https://imagizer.imageshack.com/v2/480x360q70/r/924/L4Ditq.jpg"
+animated_toxicity_path = (
+    "https://i.kym-cdn.com/photos/images/original/001/264/967/cdc.gif"
+)
+animated_enlighten_path = "https://gifdb.com/images/high/zen-meditation-chakras-illustration-6lujnenasnfmn8dt.gif"
+static_enlighten_path = "https://imagizer.imageshack.com/v2/668x500q70/r/922/bpoy6G.jpg"
+# Calculate the column widths dynamically
+toxicity_html = f"""
+<div class="toxicity-image-container">
+    <a href="review_predictor" target="_self" class="toxicity-link">
+        <img src="{static_toxicity_path}" class="toxicity-image" />
+    </a>
+</div>
+<style>
+    /* Define the hover state for column 1 */
+    .toxicity-image-container:hover .toxicity-image {{
+        content: url("{animated_toxicity_path}");
+        transform: scale(1.1); /* Enlarge the image by 10% */
+        transition: transform 0.5s ease; /* Add smooth transition */
+    }}
+</style>
+"""
+enlighten_html = f"""
+<div class="enlighten-image-container">
+    <a href="text_generator" target="_self" class="enlighten-link">
+        <img src="{static_enlighten_path}" class="enlighten-image" />
+    </a>
+</div>
+<style>
+    /* Define the hover state for column 2 */
+    .enlighten-image-container:hover .enlighten-image {{
+        content: url("{animated_enlighten_path}");
+        transform: scale(1.1); /* Enlarge the image by 10% */
+        transition: transform 0.5s ease; /* Add smooth transition */
+    }}
+</style>
+"""
+# Display HTML code with Streamlit
+st.markdown(toxicity_html, unsafe_allow_html=True)
+st.markdown(enlighten_html, unsafe_allow_html=True)
+# Display JavaScript code with Streamlit

enlightened_static.jpg ADDED Viewed

model/funcs.py CHANGED Viewed

@@ -1,10 +1,27 @@
 import matplotlib.pyplot as plt
 import torch
 import torch.nn as nn
 from sklearn.metrics import f1_score
 from torch.utils.data import Dataset
 def create_model_and_tokenizer(model_class, tokenizer_class, pretrained_weights):
     # Создаем объекты для токенизатора и модели
     tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
@@ -98,6 +115,7 @@ def train_model(
     return train_losses, train_accuracies, val_losses, val_accuracies, val_f1_scores
 def predict_sentiment(text, model, tokenizer, DEVICE):
     # Модель должна быть в режиме оценки
     model.eval()

+import time
+from functools import wraps
 import matplotlib.pyplot as plt
+import streamlit as st
 import torch
 import torch.nn as nn
 from sklearn.metrics import f1_score
 from torch.utils.data import Dataset
+def execution_time(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_seconds = end_time - start_time
+        st.write(f"Model calculating time = {execution_seconds:.5f} seconds")
+        return result
+    return wrapper
 def create_model_and_tokenizer(model_class, tokenizer_class, pretrained_weights):
     # Создаем объекты для токенизатора и модели
     tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
     return train_losses, train_accuracies, val_losses, val_accuracies, val_f1_scores
+@execution_time
 def predict_sentiment(text, model, tokenizer, DEVICE):
     # Модель должна быть в режиме оценки
     model.eval()

model/model_weights.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de960bfb6327e0509297628c3cec5bc456e6dc681b29aca9bead6330e941d44e
-size 50489371

 version https://git-lfs.github.com/spec/v1
+oid sha256:38d0d9dfdc648de05fb1bd62dab307a558d045305c5fd4700331a0967ea5e1b5
+size 50647220

preprocessing.py DELETED Viewed

@@ -1,30 +0,0 @@
-import re
-import string
-import nltk
-import pymorphy2
-from nltk.tokenize import word_tokenize
-nltk.download("punkt")
-def clean_text(text: str) -> str:
-    text = text.lower()
-    text = re.sub(r"\w*(\w)\1{2,}\w*", "", text)
-    text = re.sub(r"\d+\w*", "", text)
-    text = re.sub(r"\[.*?\]", "", text)
-    text = text.translate(str.maketrans("", "", string.punctuation))
-    return text
-def lemmize_and_tokenize_text(text: str) -> list[str]:
-    morph = pymorphy2.MorphAnalyzer()
-    tokens = word_tokenize(text)
-    lemmas = [morph.parse(token)[0].normal_form for token in tokens]
-    return lemmas
-def data_preprocessing(text: str) -> list[str]:
-    cleaned_text = clean_text(text)
-    lemmized_text = lemmize_and_tokenize_text(cleaned_text)
-    return lemmized_text

static_toxic.jpg ADDED Viewed