Neural networks, CNNs, RNNs, Transformers with TensorFlow and PyTorch. Use for image classification, NLP, sequence modeling, or complex pattern recognition.
Resources
3Install
npx skillscat add pluginagentmarketplace/custom-plugin-ai-data-scientist/deep-learning Install via the SkillsCat registry.
SKILL.md
Deep Learning
Build neural networks for computer vision, NLP, and complex data patterns.
Quick Start with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
# Define model
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# Initialize
model = NeuralNet(input_size=784, hidden_size=128, num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
for epoch in range(10):
for images, labels in train_loader:
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()CNN for Image Classification
class CNN(nn.Module):
def __init__(self, num_classes=10):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 8 * 8, 512)
self.fc2 = nn.Linear(512, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 8 * 8)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return xTransfer Learning
import torchvision.models as models
# Load pre-trained ResNet
model = models.resnet50(pretrained=True)
# Freeze layers
for param in model.parameters():
param.requires_grad = False
# Replace final layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
# Only train final layer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)LSTM for Sequences
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTMModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return outTransformers with Hugging Face
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
model = AutoModelForSequenceClassification.from_pretrained(
'bert-base-uncased',
num_labels=2
)
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=16,
warmup_steps=500,
weight_decay=0.01,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset
)
trainer.train()Tips & Tricks
Regularization:
- Dropout:
nn.Dropout(0.5) - Batch Normalization:
nn.BatchNorm2d(channels) - Weight Decay:
optimizer = optim.Adam(params, weight_decay=0.01) - Early Stopping: Monitor validation loss
Optimization:
- Learning Rate Scheduling:
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) - Gradient Clipping:
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
Data Augmentation:
from torchvision import transforms
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])Common Issues
Overfitting:
- Add dropout
- Data augmentation
- Early stopping
- Reduce model complexity
Underfitting:
- Increase model capacity
- Train longer
- Reduce regularization
- Better features
Vanishing Gradients:
- Use ReLU activation
- Batch normalization
- Residual connections
- Proper initialization