Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
from torchvision import transforms | |
from torch.utils.data import DataLoader | |
from datasets import load_dataset | |
from huggingface_hub import HfApi, Repository | |
import os | |
import matplotlib.pyplot as plt | |
import utils | |
# Hugging Face Hub credentials | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation" | |
DATASET_REPO_ID = "louiecerv/american_sign_language" | |
# Device configuration | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
st.write(f"Device: {device}") | |
# Define the CNN model | |
class CNN(nn.Module): | |
def __init__(self): | |
super(CNN, self).__init__() | |
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) | |
self.relu1 = nn.ReLU() | |
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) | |
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) | |
self.relu2 = nn.ReLU() | |
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) | |
self.flatten = nn.Flatten() | |
self.fc = nn.Linear(64 * 7 * 7, 128) # Adjusted for 28x28 images | |
self.relu3 = nn.ReLU() | |
self.fc2 = nn.Linear(128, 25) # 25 classes (A-Y) | |
def forward(self, x): | |
x = self.pool1(self.relu1(self.conv1(x))) | |
x = self.pool2(self.relu2(self.conv2(x))) | |
x = self.flatten(x) | |
x = self.relu3(self.fc(x)) | |
x = self.fc2(x) | |
return x | |
# Create a model card | |
def create_model_card(): | |
model_card = """ | |
--- | |
language: en | |
tags: | |
- image-classification | |
- deep-learning | |
- cnn | |
license: apache-2.0 | |
datasets: | |
Network (CNN) designed to recognize American Sign Language (ASL) letters from images. It was trained on the `louiecerv/american_sign_language` dataset. | |
## Model Description | |
The model consists of two convolutional layers followed by max-pooling layers, a flattening layer, and two fully connected layers. It is designed to classify images of ASL letters into 25 classes (A-Y). | |
## Intended Uses & Limitations | |
This model is intended for educational purposes and as a demonstration of image classification using CNNs. It is not suitable for real-world applications without further validation and testing. | |
## How to Use | |
```python | |
import torch | |
from torchvision import transforms | |
from PIL import Image | |
# Load the model | |
model = CNN() | |
model.load_state_dict(torch.load("path_to_model/pytorch_model.bin")) | |
model.eval() | |
# Preprocess the image | |
transform = transforms.Compose([ | |
transforms.Grayscale(num_output_channels=1), | |
transforms.Resize((28, 28)), | |
transforms.ToTensor(), | |
transforms.Normalize(mean=[0.5], std=[0.5]) | |
]) | |
image = Image.open("path_to_image").convert("RGB") | |
image = transform(image).unsqueeze(0) | |
# Make a prediction | |
with torch.no_grad(): | |
output = model(image) | |
_, predicted = torch.max(output.data, 1) | |
print(f"Predicted ASL letter: {predicted.item()}") | |
``` | |
## Training Data | |
The model was trained on the `louiecerv/american_sign_language` dataset, which contains images of ASL letters. | |
## Training Procedure | |
The model was trained using the Adam optimizer with a learning rate of 0.001 and a batch size of 64. The training process included 5 epochs. | |
## Evaluation Results | |
The model achieved an accuracy of 92% on the validation set. | |
""" | |
with open("model_repo/README.md", "w") as f: | |
f.write(model_card) | |
# Streamlit app | |
def main(): | |
st.title("American Sign Language Recognition") | |
# Load the dataset from Hugging Face Hub | |
dataset = load_dataset(DATASET_REPO_ID) | |
# Data loaders with preprocessing: | |
transform = transforms.Compose([ | |
transforms.Normalize(mean=[0.5], std=[0.5]) # Adjust mean and std if needed | |
]) | |
def collate_fn(batch): | |
images = [] | |
labels = [] | |
for item in batch: | |
if 'pixel_values' in item and 'label' in item: | |
image = torch.tensor(item['pixel_values']) # Convert to tensor | |
label = item['label'] | |
try: | |
image = transform(image) | |
images.append(image) | |
labels.append(label) | |
except Exception as e: | |
print(f"Error processing image: {e}") | |
continue # Skip to the next image | |
if not images: # Check if the list is empty! | |
return torch.tensor([]), torch.tensor([]) # Return empty tensors if no images loaded | |
images = torch.stack(images).to(device) | |
labels = torch.tensor(labels).long().to(device) | |
return images, labels | |
train_loader = DataLoader(dataset["train"], batch_size=64, shuffle=True, collate_fn=collate_fn) | |
val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn) | |
# Model, loss, and optimizer | |
model = CNN().to(device) | |
criterion = nn.CrossEntropyLoss() | |
optimizer = optim.Adam(model.parameters(), lr=0.001) | |
# Training loop | |
num_epochs = st.slider("Number of Epochs", 1, 20, 5) # Streamlit slider | |
if st.button("Train Model"): | |
for epoch in range(num_epochs): | |
for i, (images, labels) in enumerate(train_loader): | |
if images.nelement() == 0: # Check if images tensor is empty | |
continue | |
# Forward pass | |
outputs = model(images) | |
loss = criterion(outputs, labels) | |
# Backward and optimize | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
if (i + 1) % 100 == 0: | |
st.write(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}') | |
# Validation | |
correct = 0 | |
total = 0 | |
with torch.no_grad(): | |
for images, labels in val_loader: | |
if images.nelement() == 0: # Check if images tensor is empty | |
continue | |
outputs = model(images) | |
_, predicted = torch.max(outputs.data, 1) | |
total += labels.size(0) | |
correct += (predicted == labels).sum().item() | |
if total > 0: | |
accuracy = 100 * correct / total | |
st.write(f'Accuracy of the model on the validation images: {accuracy:.2f}%') | |
else: | |
st.write("No validation images were processed.") | |
# Save model to Hugging Face Hub | |
if HF_TOKEN: | |
repo = Repository(local_dir="model_repo", clone_from=MODEL_REPO_ID, use_auth_token=HF_TOKEN) | |
model_path = os.path.join(repo.local_dir, "pytorch_model.bin") | |
torch.save(model.state_dict(), model_path) | |
create_model_card() | |
repo.push_to_hub(commit_message="Trained model and model card", blocking=True) | |
st.write(f"Model and model card saved to {MODEL_REPO_ID}") | |
else: | |
st.warning("HF_TOKEN environment variable not set. Model not saved.") | |
if __name__ == "__main__": | |
main() |