File size: 2,530 Bytes
cd72b8d 84b39ac cd72b8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# LSTM_model.py
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from data_preprocessing import preprocess_data, split_data
import joblib # To save the tokenizer and label encoder
# Define the LSTM model
def build_lstm_model(vocab_size, embedding_dim=64, max_len=10, lstm_units=128, dropout_rate=0.2, output_units=6):
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(units=lstm_units, return_sequences=False))
model.add(Dropout(dropout_rate))
model.add(Dense(units=output_units, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
# Main function to execute the training process
def main():
# Path to your data file
data_path = r"E:\transactify\transactify\transactify\transactify\transactify\data_set\transaction_data.csv"
# Preprocess the data
sequences, labels, tokenizer, label_encoder = preprocess_data(data_path)
# Check if preprocessing succeeded
if sequences is not None:
print("Data preprocessing successful!")
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = split_data(sequences, labels)
print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")
print(f"Testing data shape: {X_test.shape}, Testing labels shape: {y_test.shape}")
# Build the LSTM model
vocab_size = tokenizer.num_words + 1 # +1 for padding token
model = build_lstm_model(vocab_size, max_len=10, output_units=len(label_encoder.classes_))
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")
# Save the model
model.save('transactify.h5')
print("Model saved as 'transactify.h5'")
# Save the tokenizer and label encoder
joblib.dump(tokenizer, 'tokenizer.joblib')
joblib.dump(label_encoder, 'label_encoder.joblib')
print("Tokenizer and LabelEncoder saved as 'tokenizer.joblib' and 'label_encoder.joblib'")
else:
print("Data preprocessing failed.")
# Execute the main function
if __name__ == "__main__":
main()
|