Spaces:
Sleeping
Sleeping
File size: 7,164 Bytes
1cc1116 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import streamlit as st
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
import os
import matplotlib.pyplot as plt
import utils
# Hugging Face Hub credentials
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation"
DATASET_REPO_ID = "louiecerv/american_sign_language"
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
st.write(f"Device: {device}")
# Define the CNN model
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = nn.Flatten()
self.fc = nn.Linear(64 * 7 * 7, 128) # Adjusted for 28x28 images
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, 25) # 25 classes (A-Y)
def forward(self, x):
x = self.pool1(self.relu1(self.conv1(x)))
x = self.pool2(self.relu2(self.conv2(x)))
x = self.flatten(x)
x = self.relu3(self.fc(x))
x = self.fc2(x)
return x
# Create a model card
def create_model_card():
model_card = """
---
language: en
tags:
- image-classification
- deep-learning
- cnn
license: apache-2.0
datasets:
Network (CNN) designed to recognize American Sign Language (ASL) letters from images. It was trained on the `louiecerv/american_sign_language` dataset.
## Model Description
The model consists of two convolutional layers followed by max-pooling layers, a flattening layer, and two fully connected layers. It is designed to classify images of ASL letters into 25 classes (A-Y).
## Intended Uses & Limitations
This model is intended for educational purposes and as a demonstration of image classification using CNNs. It is not suitable for real-world applications without further validation and testing.
## How to Use
```python
import torch
from torchvision import transforms
from PIL import Image
# Load the model
model = CNN()
model.load_state_dict(torch.load("path_to_model/pytorch_model.bin"))
model.eval()
# Preprocess the image
transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
image = Image.open("path_to_image").convert("RGB")
image = transform(image).unsqueeze(0)
# Make a prediction
with torch.no_grad():
output = model(image)
_, predicted = torch.max(output.data, 1)
print(f"Predicted ASL letter: {predicted.item()}")
```
## Training Data
The model was trained on the `louiecerv/american_sign_language` dataset, which contains images of ASL letters.
## Training Procedure
The model was trained using the Adam optimizer with a learning rate of 0.001 and a batch size of 64. The training process included 5 epochs.
## Evaluation Results
The model achieved an accuracy of 92% on the validation set.
"""
with open("model_repo/README.md", "w") as f:
f.write(model_card)
# Streamlit app
def main():
st.title("American Sign Language Recognition")
# Load the dataset from Hugging Face Hub
dataset = load_dataset(DATASET_REPO_ID)
# Data loaders with preprocessing:
transform = transforms.Compose([
transforms.Normalize(mean=[0.5], std=[0.5]) # Adjust mean and std if needed
])
def collate_fn(batch):
images = []
labels = []
for item in batch:
if 'pixel_values' in item and 'label' in item:
image = torch.tensor(item['pixel_values']) # Convert to tensor
label = item['label']
try:
image = transform(image)
images.append(image)
labels.append(label)
except Exception as e:
print(f"Error processing image: {e}")
continue # Skip to the next image
if not images: # Check if the list is empty!
return torch.tensor([]), torch.tensor([]) # Return empty tensors if no images loaded
images = torch.stack(images).to(device)
labels = torch.tensor(labels).long().to(device)
return images, labels
train_loader = DataLoader(dataset["train"], batch_size=64, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn)
# Model, loss, and optimizer
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = st.slider("Number of Epochs", 1, 20, 5) # Streamlit slider
if st.button("Train Model"):
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
if images.nelement() == 0: # Check if images tensor is empty
continue
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
st.write(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
# Validation
correct = 0
total = 0
with torch.no_grad():
for images, labels in val_loader:
if images.nelement() == 0: # Check if images tensor is empty
continue
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
if total > 0:
accuracy = 100 * correct / total
st.write(f'Accuracy of the model on the validation images: {accuracy:.2f}%')
else:
st.write("No validation images were processed.")
# Save model to Hugging Face Hub
if HF_TOKEN:
repo = Repository(local_dir="model_repo", clone_from=MODEL_REPO_ID, use_auth_token=HF_TOKEN)
model_path = os.path.join(repo.local_dir, "pytorch_model.bin")
torch.save(model.state_dict(), model_path)
create_model_card()
repo.push_to_hub(commit_message="Trained model and model card", blocking=True)
st.write(f"Model and model card saved to {MODEL_REPO_ID}")
else:
st.warning("HF_TOKEN environment variable not set. Model not saved.")
if __name__ == "__main__":
main() |