eaglelandsonce commited on
Commit
cc6f9f3
·
verified ·
1 Parent(s): 08cf096

Update pages/21_NLP.py

Browse files
Files changed (1) hide show
  1. pages/21_NLP.py +67 -70
pages/21_NLP.py CHANGED
@@ -1,85 +1,82 @@
1
  import streamlit as st
2
- import tensorflow as tf
3
- from tensorflow.keras.preprocessing.sequence import pad_sequences
4
  import numpy as np
 
5
  import matplotlib.pyplot as plt
 
 
 
 
 
6
  from sklearn.model_selection import train_test_split
 
 
7
 
8
- # Load the IMDb dataset
9
- from datasets import load_dataset
10
-
11
- # Load dataset
12
- dataset = load_dataset("imdb")
13
 
14
- # Split dataset into training and testing
15
- train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2)
16
-
17
- # Tokenizer parameters
18
- vocab_size = 10000
19
- max_length = 128
20
  embedding_dim = 128
21
 
22
- # Tokenize the data
23
- tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="<OOV>")
24
- tokenizer.fit_on_texts(train_data['text'].values)
25
- word_index = tokenizer.word_index
26
-
27
- # Convert text to sequences
28
- X_train = tokenizer.texts_to_sequences(train_data['text'].values)
29
- X_test = tokenizer.texts_to_sequences(test_data['text'].values)
30
 
31
- # Pad sequences
32
- X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
33
- X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')
 
 
 
34
 
35
- # Labels
36
- y_train = train_data['label'].values
37
- y_test = test_data['label'].values
38
 
39
- # Build the LSTM model
40
- model = tf.keras.Sequential([
41
- tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
42
- tf.keras.layers.LSTM(64, return_sequences=True),
43
- tf.keras.layers.LSTM(32),
44
- tf.keras.layers.Dense(24, activation='relu'),
45
- tf.keras.layers.Dense(1, activation='sigmoid')
46
- ])
47
 
48
- model.summary()
49
 
50
- # Compile the model
51
- model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
52
 
53
- # Train the model
54
- history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, batch_size=32)
55
-
56
- # Evaluate the model
57
  loss, accuracy = model.evaluate(X_test, y_test)
58
- st.write(f'Test Accuracy: {accuracy}')
59
-
60
- # Plot training & validation accuracy values
61
- st.subheader("Training and Validation Accuracy")
62
- fig, ax = plt.subplots()
63
- ax.plot(history.history['accuracy'], label='Training Accuracy')
64
- ax.plot(history.history['val_accuracy'], label='Validation Accuracy')
65
- ax.set_xlabel('Epoch')
66
- ax.set_ylabel('Accuracy')
67
- ax.legend()
68
- st.pyplot(fig)
69
-
70
- st.subheader("Training and Validation Loss")
71
- fig, ax = plt.subplots()
72
- ax.plot(history.history['loss'], label='Training Loss')
73
- ax.plot(history.history['val_loss'], label='Validation Loss')
74
- ax.set_xlabel('Epoch')
75
- ax.set_ylabel('Loss')
76
- ax.legend()
77
- st.pyplot(fig)
78
-
79
- # Convert the model to TensorFlow.js format
80
- import tensorflowjs as tfjs
81
-
82
- tfjs_target_dir = 'tfjs_model'
83
- model.save('model.h5')
84
- tfjs.converters.save_keras_model(model, tfjs_target_dir)
85
- st.write("Model saved and converted to TensorFlow.js format.")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
2
  import numpy as np
3
+ import pandas as pd
4
  import matplotlib.pyplot as plt
5
+ from tensorflow.keras.preprocessing.text import Tokenizer
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ from tensorflow.keras.models import Sequential
8
+ from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
9
+ from tensorflow.keras.callbacks import EarlyStopping
10
  from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import accuracy_score, confusion_matrix
12
+ from tensorflow.keras.datasets import imdb
13
 
14
+ # Load the dataset
15
+ (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)
 
 
 
16
 
17
+ # Data Preprocessing
18
+ max_words = 500
19
+ max_len = 500
 
 
 
20
  embedding_dim = 128
21
 
22
+ X_train = pad_sequences(X_train, maxlen=max_len)
23
+ X_test = pad_sequences(X_test, maxlen=max_len)
 
 
 
 
 
 
24
 
25
+ # Build the Model
26
+ model = Sequential()
27
+ model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len))
28
+ model.add(SpatialDropout1D(0.2))
29
+ model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
30
+ model.add(Dense(1, activation='sigmoid'))
31
 
32
+ model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 
 
33
 
34
+ # Train the Model
35
+ X_train_partial, X_val, y_train_partial, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
 
 
 
 
 
 
36
 
37
+ early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
38
 
39
+ history = model.fit(X_train_partial, y_train_partial, epochs=10, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])
 
40
 
41
+ # Evaluate the Model
 
 
 
42
  loss, accuracy = model.evaluate(X_test, y_test)
43
+ st.write(f'Test Accuracy: {accuracy:.4f}')
44
+
45
+ # Plotting functions
46
+ def plot_accuracy(history):
47
+ plt.plot(history.history['accuracy'])
48
+ plt.plot(history.history['val_accuracy'])
49
+ plt.title('Model accuracy')
50
+ plt.ylabel('Accuracy')
51
+ plt.xlabel('Epoch')
52
+ plt.legend(['Train', 'Validation'], loc='upper left')
53
+ st.pyplot(plt)
54
+
55
+ def plot_loss(history):
56
+ plt.plot(history.history['loss'])
57
+ plt.plot(history.history['val_loss'])
58
+ plt.title('Model loss')
59
+ plt.ylabel('Loss')
60
+ plt.xlabel('Epoch')
61
+ plt.legend(['Train', 'Validation'], loc='upper left')
62
+ st.pyplot(plt)
63
+
64
+ # Display plots
65
+ plot_accuracy(history)
66
+ plot_loss(history)
67
+
68
+ # Text Input and Prediction
69
+ st.header("Movie Review Sentiment Analysis")
70
+ review_input = st.text_area("Enter your movie review:", "This movie was fantastic! I loved it.")
71
+
72
+ # Tokenization and padding
73
+ tokenizer = Tokenizer(num_words=5000)
74
+ tokenizer.fit_on_texts(review_input)
75
+ review_seq = tokenizer.texts_to_sequences([review_input])
76
+ review_pad = pad_sequences(review_seq, maxlen=max_len)
77
+
78
+ # Prediction
79
+ if st.button("Classify Review"):
80
+ prediction = (model.predict(review_pad) > 0.5).astype("int32")
81
+ sentiment = "Positive" if prediction[0][0] == 1 else "Negative"
82
+ st.write(f'Sentiment: **{sentiment}**')