Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

eaglelandsonce commited on Jul 11, 2024

Commit

cc6f9f3

verified ·

1 Parent(s): 08cf096

Update pages/21_NLP.py

Browse files

Files changed (1) hide show

pages/21_NLP.py +67 -70

pages/21_NLP.py CHANGED Viewed

@@ -1,85 +1,82 @@
 import streamlit as st
-import tensorflow as tf
-from tensorflow.keras.preprocessing.sequence import pad_sequences
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
-# Load the IMDb dataset
-from datasets import load_dataset
-# Load dataset
-dataset = load_dataset("imdb")
-# Split dataset into training and testing
-train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2)
-# Tokenizer parameters
-vocab_size = 10000
-max_length = 128
 embedding_dim = 128
-# Tokenize the data
-tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="<OOV>")
-tokenizer.fit_on_texts(train_data['text'].values)
-word_index = tokenizer.word_index
-# Convert text to sequences
-X_train = tokenizer.texts_to_sequences(train_data['text'].values)
-X_test = tokenizer.texts_to_sequences(test_data['text'].values)
-# Pad sequences
-X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
-X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')
-# Labels
-y_train = train_data['label'].values
-y_test = test_data['label'].values
-# Build the LSTM model
-model = tf.keras.Sequential([
-    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
-    tf.keras.layers.LSTM(64, return_sequences=True),
-    tf.keras.layers.LSTM(32),
-    tf.keras.layers.Dense(24, activation='relu'),
-    tf.keras.layers.Dense(1, activation='sigmoid')
-])
-model.summary()
-# Compile the model
-model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
-# Train the model
-history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, batch_size=32)
-# Evaluate the model
 loss, accuracy = model.evaluate(X_test, y_test)
-st.write(f'Test Accuracy: {accuracy}')
-# Plot training & validation accuracy values
-st.subheader("Training and Validation Accuracy")
-fig, ax = plt.subplots()
-ax.plot(history.history['accuracy'], label='Training Accuracy')
-ax.plot(history.history['val_accuracy'], label='Validation Accuracy')
-ax.set_xlabel('Epoch')
-ax.set_ylabel('Accuracy')
-ax.legend()
-st.pyplot(fig)
-st.subheader("Training and Validation Loss")
-fig, ax = plt.subplots()
-ax.plot(history.history['loss'], label='Training Loss')
-ax.plot(history.history['val_loss'], label='Validation Loss')
-ax.set_xlabel('Epoch')
-ax.set_ylabel('Loss')
-ax.legend()
-st.pyplot(fig)
-# Convert the model to TensorFlow.js format
-import tensorflowjs as tfjs
-tfjs_target_dir = 'tfjs_model'
-model.save('model.h5')
-tfjs.converters.save_keras_model(model, tfjs_target_dir)
-st.write("Model saved and converted to TensorFlow.js format.")

 import streamlit as st
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
+from tensorflow.keras.callbacks import EarlyStopping
 from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, confusion_matrix
+from tensorflow.keras.datasets import imdb
+# Load the dataset
+(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)
+# Data Preprocessing
+max_words = 500
+max_len = 500
 embedding_dim = 128
+X_train = pad_sequences(X_train, maxlen=max_len)
+X_test = pad_sequences(X_test, maxlen=max_len)
+# Build the Model
+model = Sequential()
+model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len))
+model.add(SpatialDropout1D(0.2))
+model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
+model.add(Dense(1, activation='sigmoid'))
+model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+# Train the Model
+X_train_partial, X_val, y_train_partial, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
+early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
+history = model.fit(X_train_partial, y_train_partial, epochs=10, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])
+# Evaluate the Model
 loss, accuracy = model.evaluate(X_test, y_test)
+st.write(f'Test Accuracy: {accuracy:.4f}')
+# Plotting functions
+def plot_accuracy(history):
+    plt.plot(history.history['accuracy'])
+    plt.plot(history.history['val_accuracy'])
+    plt.title('Model accuracy')
+    plt.ylabel('Accuracy')
+    plt.xlabel('Epoch')
+    plt.legend(['Train', 'Validation'], loc='upper left')
+    st.pyplot(plt)
+def plot_loss(history):
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.title('Model loss')
+    plt.ylabel('Loss')
+    plt.xlabel('Epoch')
+    plt.legend(['Train', 'Validation'], loc='upper left')
+    st.pyplot(plt)
+# Display plots
+plot_accuracy(history)
+plot_loss(history)
+# Text Input and Prediction
+st.header("Movie Review Sentiment Analysis")
+review_input = st.text_area("Enter your movie review:", "This movie was fantastic! I loved it.")
+# Tokenization and padding
+tokenizer = Tokenizer(num_words=5000)
+tokenizer.fit_on_texts(review_input)
+review_seq = tokenizer.texts_to_sequences([review_input])
+review_pad = pad_sequences(review_seq, maxlen=max_len)
+# Prediction
+if st.button("Classify Review"):
+    prediction = (model.predict(review_pad) > 0.5).astype("int32")
+    sentiment = "Positive" if prediction[0][0] == 1 else "Negative"
+    st.write(f'Sentiment: **{sentiment}**')