实验概述
⚠️ 实验说明: 本实验仅训练1轮,作为快速测试使用。正式实验建议训练15-20轮以获得最佳性能。
本实验采用 SVHN 真实街景数据集,结合 DenseNet-121 密集连接架构、CutMix 数据增强和 OneCycleLR 学习率调度,探索在真实场景数字识别任务上的深度学习性能。
环境配置
硬件环境
软件环境
数据集说明
SVHN 类别标签对照表
数据集属性
数据预处理代码
from torchvision import transforms
# 训练集数据增强
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4), # 随机裁剪
transforms.RandomHorizontalFlip(), # 水平翻转
transforms.RandomRotation(10), # 随机旋转±10°
transforms.ColorJitter(brightness=0.2, contrast=0.2), # 颜色抖动
transforms.ToTensor(), # 转换为Tensor
transforms.Normalize( # SVHN标准化参数
mean=[0.4377, 0.4438, 0.4728],
std=[0.1980, 0.2010, 0.1970]
)
])
# 测试集预处理
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
mean=[0.4377, 0.4438, 0.4728],
std=[0.1980, 0.2010, 0.1970]
)
])模型架构
网络结构描述
DenseNet-121 - 密集连接卷积网络
模型参数统计
完整模型代码
import torch
import torch.nn as nn
import torchvision.models as models
class SVHNDenseNet(nn.Module):
"""
DenseNet-121 适配器 - 针对SVHN 32x32图像优化
DenseNet核心特点:
1. 密集连接:每一层都与后续所有层连接
2. 特征复用:前面层的特征被后面层直接使用
3. 参数量少:相比ResNet,参数量更少
4. 梯度流通:梯度可直接传回前面层
"""
def __init__(self, num_classes=10):
super().__init__()
# 加载预训练DenseNet-121权重
self.densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
# 修改首层卷积:适配32x32小图像
self.densenet.features.conv0 = nn.Conv2d(
3, 64, kernel_size=3, stride=1, padding=1, bias=False
)
# 移除首层最大池化(避免过早缩小32x32图像)
self.densenet.features.pool0 = nn.Identity()
# 修改分类器:适配SVHN 10类
self.densenet.classifier = nn.Linear(1024, num_classes)
# 初始化分类器权重
nn.init.xavier_uniform_(self.densenet.classifier.weight)
nn.init.zeros_(self.densenet.classifier.bias)
def forward(self, x):
return self.densenet(x)训练配置
超参数表格
数据增强配置
CutMix 增强代码
def rand_bbox(size, lam):
"""生成CutMix的随机裁剪区域"""
W, H = size[2], size[3]
cut_rat = torch.sqrt(1. - lam)
cut_w, cut_h = int(W * cut_rat), int(H * cut_rat)
cx, cy = torch.randint(0, W, (1,)).item(), torch.randint(0, H, (1,)).item()
bbx1, bby1 = max(0, cx - cut_w // 2), max(0, cy - cut_h // 2)
bbx2, bby2 = min(W, cx + cut_w // 2), min(H, cy + cut_h // 2)
return bbx1, bby1, bbx2, bby2
def cutmix_data(x, y, alpha=1.0):
"""
CutMix: 通过裁剪拼接混合两个样本
不同于MixUp的线性混合,CutMix保留局部区域特征
"""
lam = torch.distributions.Beta(alpha, alpha).sample()
index = torch.randperm(x.size(0)).to(x.device)
bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
x[:, :, bbx1:bbx2, bby1:bby2] = x[index, :, bbx1:bbx2, bby1:bby2]
lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size(-1) * x.size(-2)))
return x, y, y[index], lam
def cutmix_criterion(criterion, pred, y_a, y_b, lam):
"""CutMix损失函数: 按比例组合两个样本的损失"""
return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)训练过程
Epoch结果
关键指标
训练曲线分析
-
初期快速收敛: 1轮训练即达到81.56%测试准确率,显示DenseNet-121预训练权重的有效性
-
训练准确率较低: 36.98%的训练准确率表明CutMix增强带来一定的正则化效果
-
测试准确率远超训练: CutMix混合标签导致训练准确率被低估
GPU加速效果
完整代码
"""
SVHN 图像分类器训练脚本
基于PyTorch + GPU 加速的深度学习实验
【实验设计】
- 数据集: SVHN (街景门牌号, 彩色32x32, 10类)
- 模型: DenseNet-121 (密集连接卷积网络)
- 数据增强: CutMix (裁剪拼接增强)
- 学习率调度: OneCycleLR (单周期学习率)
- 优化器: AdamW (带权重衰减的Adam)
"""
import os
import time
import logging
import glob
import re
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import SVHN
import torchvision.models as models
def create_date_output_dir(base_dir='.'):
"""创建当天日期的输出目录"""
today = datetime.now()
date_str = today.strftime('%Y-%m-%d')
dir_name = os.path.join(base_dir, date_str)
if not os.path.exists(dir_name):
os.makedirs(dir_name, exist_ok=True)
return dir_name
counter = 1
while True:
dir_name_with_suffix = os.path.join(base_dir, f"{date_str}_{counter}")
if not os.path.exists(dir_name_with_suffix):
os.makedirs(dir_name_with_suffix, exist_ok=True)
return dir_name_with_suffix
counter += 1
def get_device():
"""自动检测并选择最佳计算设备"""
if torch.cuda.is_available():
device = torch.device("cuda")
print(f"[GPU] 使用CUDA设备: {torch.cuda.get_device_name(0)}")
elif torch.backends.mps.is_available():
device = torch.device("mps")
else:
device = torch.device("cpu")
return device
def rand_bbox(size, lam):
"""生成CutMix的随机裁剪区域"""
W, H = size[2], size[3]
cut_rat = torch.sqrt(1. - lam)
cut_w, cut_h = int(W * cut_rat), int(H * cut_rat)
cx, cy = torch.randint(0, W, (1,)).item(), torch.randint(0, H, (1,)).item()
bbx1, bby1 = max(0, cx - cut_w // 2), max(0, cy - cut_h // 2)
bbx2, bby2 = min(W, cx + cut_w // 2), min(H, cy + cut_h // 2)
return bbx1, bby1, bbx2, bby2
def cutmix_data(x, y, alpha=1.0):
"""CutMix: 通过裁剪拼接混合两个样本"""
lam = torch.distributions.Beta(alpha, alpha).sample()
index = torch.randperm(x.size(0)).to(x.device)
bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
x[:, :, bbx1:bbx2, bby1:bby2] = x[index, :, bbx1:bbx2, bby1:bby2]
lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size(-1) * x.size(-2)))
return x, y, y[index], lam
def cutmix_criterion(criterion, pred, y_a, y_b, lam):
"""CutMix损失函数"""
return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
class SVHNDenseNet(nn.Module):
"""DenseNet-121 适配器 - 针对SVHN 32x32图像优化"""
def __init__(self, num_classes=10):
super().__init__()
self.densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
self.densenet.features.conv0 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.densenet.features.pool0 = nn.Identity()
self.densenet.classifier = nn.Linear(1024, num_classes)
nn.init.xavier_uniform_(self.densenet.classifier.weight)
nn.init.zeros_(self.densenet.classifier.bias)
def forward(self, x):
return self.densenet(x)
def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, scheduler=None, use_cutmix=True, cutmix_alpha=1.0):
"""训练一个epoch"""
model.train()
running_loss, correct, total = 0.0, 0, 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
if use_cutmix:
data, target_a, target_b, lam = cutmix_data(data, target, alpha=cutmix_alpha)
outputs = model(data)
loss = cutmix_criterion(criterion, outputs, target_a, target_b, lam)
_, predicted = outputs.max(1)
total += target.size(0)
correct += (lam * predicted.eq(target_a).sum().float() + (1 - lam) * predicted.eq(target_b).sum().float()).item()
else:
outputs = model(data)
loss = criterion(outputs, target)
_, predicted = outputs.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
loss.backward()
optimizer.step()
if scheduler: scheduler.step()
running_loss += loss.item()
if (batch_idx + 1) % 100 == 0:
print(f' Epoch {epoch} - Batch {batch_idx + 1}: Loss={loss.item():.4f}, Acc={100.*correct/total:.2f}%')
return running_loss / len(train_loader), 100. * correct / total
def evaluate(model, test_loader, criterion, device):
"""评估函数"""
model.eval()
test_loss, correct, total = 0.0, 0, 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
outputs = model(data)
test_loss += criterion(outputs, target).item()
_, predicted = outputs.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
return test_loss / len(test_loader), 100. * correct / total
def main():
print("=" * 60)
print("开始 SVHN 深度学习实验")
print("DenseNet-121 + CutMix + OneCycleLR")
print("=" * 60)
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
output_dir = create_date_output_dir(project_root)
data_dir = os.path.join(output_dir, 'data')
logs_dir = os.path.join(output_dir, 'logs')
os.makedirs(data_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)
batch_size, num_epochs, max_lr = 128, 1, 0.01
device = get_device()
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4377, 0.4438, 0.4728], std=[0.1980, 0.2010, 0.1970])
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.4377, 0.4438, 0.4728], std=[0.1980, 0.2010, 0.1970])
])
train_dataset = SVHN(root=data_dir, split='train', transform=train_transform, download=True)
test_dataset = SVHN(root=data_dir, split='test', transform=test_transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=0, pin_memory=True)
model = SVHNDenseNet(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=max_lr, epochs=num_epochs, steps_per_epoch=len(train_loader), pct_start=0.3)
best_acc = 0.0
start_time = time.time()
for epoch in range(1, num_epochs + 1):
train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device, epoch, scheduler)
test_loss, test_acc = evaluate(model, test_loader, criterion, device)
print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.2f}%, Test Loss={test_loss:.4f}, Test Acc={test_acc:.2f}%")
if test_acc > best_acc:
best_acc = test_acc
torch.save(model.state_dict(), os.path.join(logs_dir, 'best_model.pth'))
print(f"\n训练完成! 最佳测试准确率: {best_acc:.2f}%")
print(f"总训练时间: {time.time() - start_time:.1f}s")
if __name__ == '__main__':
main()结论
⚠️ 重要说明: 本实验仅训练1轮作为快速测试验证代码逻辑和数据加载是否正常,并非完整实验。DenseNet-121预训练权重在1轮内即展现出良好的特征迁移能力,测试准确率达81.56%,表明该方案具有进一步优化的潜力。
实验总结
关键发现
-
DenseNet-121有效性: 预训练权重在SVHN上效果显著,1轮即达81.56%
-
CutMix正则化: 训练准确率被低估(36.98%),但测试准确率真实反映模型性能
-
OneCycleLR加速: 快速学习率调整策略促进了快速收敛
-
真实场景挑战: SVHN包含各种光照、角度、背景干扰,比CIFAR更复杂
改进建议
-
增加训练轮数: 当前仅1轮,建议训练15-20轮可达90%+
-
使用额外数据: SVHN提供extra数据集(53万样本),可显著提升性能
-
调整CutMix参数: 尝试不同α值或与MixUp混合使用
-
更优增强策略: 添加AutoAugment或RandAugment
输出文件清单
输出目录结构
d:\WorkPlace\ClaudeCode\2026-04-03_1/
├── data/ # 数据集缓存
│ └── SVHN/
├── logs/ # 训练日志和模型
│ ├── training_log_svhn_densenet121_cutmix_*.txt
│ └── svhn_densenet121_cutmix_best_model_*.pth
├── SVHN_DenseNet121_CutMix.py
└── SVHN_DenseNet121_CutMix_实验报告.md实验脚本: SVHN_DenseNet121_CutMix.py