Spaces:
Sleeping
Sleeping
File size: 4,095 Bytes
7a6466b 612061e 1e6410a 90d43bc af78165 612061e 90d43bc af78165 58e8885 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 90d43bc af78165 1e6410a af78165 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
os.system("apt-get update")
os.system("apt-get install -y python3-pip") # Make sure pip is available
os.system("pip install transformers")
# Restart the kernel here if you have the option (in a notebook setting)
import transformers
from torch.utils.data import DataLoader
import streamlit as st
from datasets import load_dataset, Audio
from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
import torch
import os
# Install using apt
# Load the MInDS-14 dataset
dataset = load_dataset("PolyAI/minds14", "en-US", split="train", trust_remote_code=True)
# Load pretrained model and feature extractor
model = AutoModelForAudioClassification.from_pretrained("facebook/wav2vec2-base")
feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
# Resample audio to 16kHz
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
# Preprocessing function
def preprocess_function(examples):
audio_arrays = [x["array"] for x in examples["audio"]]
inputs = feature_extractor(
audio_arrays,
sampling_rate=16000,
padding=True,
max_length=100000,
truncation=True,
)
return inputs
dataset = dataset.map(preprocess_function, batched=True)
dataset = dataset.rename_column("intent_class", "labels")
dataset = dataset.set_format(type="torch", columns=["input_values", "labels"])
# Create DataLoader
batch_size = 4 # Adjust as needed
dataloader = DataLoader(dataset, batch_size=batch_size)
# Set device and move model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Training loop (example)
num_epochs = 2 # Keep small for testing on Spaces!
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
for epoch in range(num_epochs):
for batch in dataloader:
input_values = batch["input_values"].to(device)
labels = batch["labels"].to(device)
optimizer.zero_grad()
outputs = model(input_values, labels=labels)
loss = outputs.loss
loss.backward()
optimizer.step()
print(f"Epoch: {epoch+1}, Loss: {loss.item()}")
# Streamlit UI
st.title("Audio Classification with Minds14")
st.write("Training complete!") # You'll want to add more insightful outputs here eventually
st.markdown("""
<div class="mt-4">
<div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="#audio"> <div class="w-full text-center bg-gradient-to-r from-violet-300 via-sky-400 to-green-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Audio</div>
<p class="text-gray-700">Resample an audio dataset and get it ready for a model to classify what type of banking issue a speaker is calling about.</p>
</a>
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="#vision"> <div class="w-full text-center bg-gradient-to-r from-pink-400 via-purple-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Vision</div>
<p class="text-gray-700">Apply data augmentation to an image dataset and get it ready for a model to diagnose disease in bean plants.</p>
</a>
<a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="#nlp"> <div class="w-full text-center bg-gradient-to-r from-orange-300 via-red-400 to-violet-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">NLP</div>
<p class="text-gray-700">Tokenize a dataset and get it ready for a model to determine whether a pair of sentences have the same meaning.</p>
</a>
</div>
</div>
<div class="mt-4"> </div>
<p>
Check out <a href="https://huggingface.co/course/chapter5/1?fw=pt">Chapter 5</a> of the Hugging Face course to learn more about other important topics such as loading remote or local datasets, tools for cleaning up a dataset, and creating your own dataset.
</p>
""", unsafe_allow_html=True) |