实验概述
STL-10是CIFAR-10的"进阶版"数据集,专为学习图像特征设计。相比CIFAR-10,STL-10使用96×96的大尺寸彩色图像,训练集仅5000张(每类500张),更具挑战性。本实验使用自定义轻量级CNN模型,探索在大尺寸图像上的分类效果。
环境配置
硬件环境
软件环境
PyTorch环境说明
声明:本实验使用 conda 环境
myenv中已安装的 PyTorch (CUDA 13.0) 版本。请勿私自安装其他版本的 torch。
# 激活已有环境
conda activate myenv
# 验证GPU是否可用
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}'); print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else None}')"数据集说明
STL-10 类别标签对照表
数据集属性
数据预处理代码
from torchvision import transforms
# 训练集数据增强
train_transform = transforms.Compose([
transforms.RandomCrop(96, padding=8), # 随机裁剪
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # 颜色抖动
transforms.ToTensor(), # 转换为张量
transforms.Normalize(mean=[0.4467, 0.4328, 0.4141], std=[0.2783, 0.2734, 0.2976])
])
# 测试集/验证集预处理
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.4467, 0.4328, 0.4141], std=[0.2783, 0.2734, 0.2976])
])模型架构
网络结构描述
STL10Net - 轻量级卷积神经网络
模型参数统计
完整模型代码
class STL10Net(nn.Module):
"""
STL-10专用轻量级CNN
结构: 4个卷积块 + 2个全连接层
输入: 3x96x96 彩色图像
输出: 10个类别
"""
def __init__(self, num_classes=10):
super(STL10Net, self).__init__()
# Conv Block 1: 96x96 -> 48x48
self.conv_block1 = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
# Conv Block 2: 48x48 -> 24x24
self.conv_block2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
# Conv Block 3: 24x24 -> 12x12
self.conv_block3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
# Conv Block 4: 12x12 -> 6x6
self.conv_block4 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
# 全连接层: 256*6*6 -> 512 -> 256 -> 10
self.fc_block = nn.Sequential(
nn.Flatten(),
nn.Linear(256 * 6 * 6, 512),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.conv_block1(x)
x = self.conv_block2(x)
x = self.conv_block3(x)
x = self.conv_block4(x)
x = self.fc_block(x)
return x训练配置
超参数表格
数据增强配置
训练过程
各Epoch结果表格
注:
*表示该Epoch刷新最佳测试准确率
关键指标
训练曲线分析
-
收敛速度: 模型在前10个Epoch快速收敛,测试准确率从26.99%提升至43.41%
-
稳定期: Epoch 10-20期间,模型进入稳定训练阶段,测试准确率在43%-53%之间波动
-
过拟合迹象: 训练准确率(59.72%)与测试准确率(60.64%)接近,未出现过拟合现象
-
Loss下降: 测试Loss从1.7662持续下降至1.0492,表明模型持续学习
-
后期加速: Epoch 20后测试准确率提升明显,从53%提升至62%
GPU加速效果
RTX 5090 Laptop GPU提供了高效的加速能力,使得训练大尺寸图像模型成为可能。
完整代码
"""
STL-10 图像分类器训练脚本
基于PyTorch的深度学习实验
数据集: STL-10 (96x96彩色图像, 10类)
模型: STL10Net (轻量级CNN)
"""
import os
import time
import logging
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import STL10
def get_device():
"""自动检测并选择最佳计算设备"""
if torch.cuda.is_available():
device = torch.device("cuda")
gpu_name = torch.cuda.get_device_name(0)
gpu_count = torch.cuda.device_count()
print(f"[GPU] 使用CUDA设备: {gpu_name}")
print(f"[GPU] GPU数量: {gpu_count}")
print(f"[GPU] CUDA版本: {torch.version.cuda}")
mem_allocated = torch.cuda.memory_allocated(0) / 1024**2
mem_reserved = torch.cuda.memory_reserved(0) / 1024**2
print(f"[GPU] 已分配内存: {mem_allocated:.2f} MB, 预留: {mem_reserved:.2f} MB")
elif torch.backends.mps.is_available():
device = torch.device("mps")
print("[GPU] 使用Apple MPS设备")
else:
device = torch.device("cpu")
print("[CPU] 使用CPU设备")
return device
def setup_logger(log_file):
"""配置日志:同时输出到文件和控制台"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.handlers.clear()
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(logging.INFO)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
class STL10Net(nn.Module):
"""STL-10专用轻量级CNN"""
def __init__(self, num_classes=10):
super(STL10Net, self).__init__()
self.conv_block1 = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block4 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.fc_block = nn.Sequential(
nn.Flatten(),
nn.Linear(256 * 6 * 6, 512),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(512, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.conv_block1(x)
x = self.conv_block2(x)
x = self.conv_block3(x)
x = self.conv_block4(x)
x = self.fc_block(x)
return x
def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch):
"""训练一个epoch"""
model.train()
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
outputs = model(data)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = outputs.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
if (batch_idx + 1) % 50 == 0:
print(f' Epoch {epoch} - Batch {batch_idx + 1}/{len(train_loader)}: '
f'Loss={loss.item():.4f}, Acc={100.*correct/total:.2f}%')
epoch_loss = running_loss / len(train_loader)
epoch_acc = 100. * correct / total
return epoch_loss, epoch_acc
def evaluate(model, test_loader, criterion, device):
"""评估函数"""
model.eval()
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
outputs = model(data)
test_loss += criterion(outputs, target).item()
_, predicted = outputs.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
test_loss = test_loss / len(test_loader)
test_acc = 100. * correct / total
return test_loss, test_acc
def main():
os.makedirs('./data', exist_ok=True)
os.makedirs('./logs', exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
log_file = f'./logs/training_log_stl10_{timestamp}.txt'
logger = setup_logger(log_file)
logger.info("=" * 60)
logger.info("STL-10 图像分类器训练开始")
logger.info("=" * 60)
device = get_device()
logger.info(f"使用设备: {device}")
train_transform = transforms.Compose([
transforms.RandomCrop(96, padding=8),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4467, 0.4328, 0.4141], std=[0.2783, 0.2734, 0.2976])
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.4467, 0.4328, 0.4141], std=[0.2783, 0.2734, 0.2976])
])
logger.info("加载STL-10数据集...")
train_dataset = STL10(
root='./data',
split='train',
transform=train_transform,
download=True
)
test_dataset = STL10(
root='./data',
split='test',
transform=test_transform,
download=True
)
logger.info(f"训练集大小: {len(train_dataset)}")
logger.info(f"测试集大小: {len(test_dataset)}")
logger.info(f"类别数量: 10")
logger.info(f"类别名称: {test_dataset.classes}")
train_loader = DataLoader(
train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True
)
test_loader = DataLoader(
test_dataset, batch_size=128, shuffle=False, num_workers=0, pin_memory=True
)
model = STL10Net(num_classes=10).to(device)
logger.info(f"\n模型结构:\n{model}")
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"\n总参数量: {total_params:,}")
logger.info(f"可训练参数量: {trainable_params:,}")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 30
best_acc = 0.0
logger.info(f"\n开始训练: {num_epochs} epochs, batch_size=64")
logger.info("-" * 60)
for epoch in range(1, num_epochs + 1):
epoch_start_time = time.time()
train_loss, train_acc = train_one_epoch(
model, train_loader, criterion, optimizer, device, epoch
)
test_loss, test_acc = evaluate(
model, test_loader, criterion, device
)
epoch_time = time.time() - epoch_start_time
logger.info(
f"Epoch {epoch}/{num_epochs} - "
f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% - "
f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}% - "
f"Time: {epoch_time:.1f}s"
)
if test_acc > best_acc:
best_acc = test_acc
model_path = f'./logs/stl10_best_model_{timestamp}.pth'
torch.save(model.state_dict(), model_path)
logger.info(f"*** 新最佳模型已保存! Test Acc: {best_acc:.2f}% ***")
logger.info("-" * 60)
logger.info(f"训练完成! 最佳测试准确率: {best_acc:.2f}%")
logger.info(f"模型已保存到: stl10_best_model_{timestamp}.pth")
logger.info(f"日志已保存到: {log_file}")
logger.info("=" * 60)
if torch.cuda.is_available():
logger.info(f"\nGPU内存统计:")
logger.info(f" - 已分配: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
logger.info(f" - 已预留: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
logger.info(f" - 最大分配: {torch.cuda.max_memory_allocated(0) / 1024**2:.2f} MB")
if __name__ == '__main__':
main()结论
实验总结
关键发现
-
大尺寸图像挑战: 相比CIFAR-10 (32×32),STL-10 (96×96)的图像尺寸大了约9倍,像素多了9倍,模型需要更强的特征提取能力
-
小样本学习: STL-10训练集仅5000张(每类500张),远少于CIFAR-10的50000张,数据增强对防止过拟合至关重要
-
收敛特性: 模型在Epoch 20后进入快速提升期,测试准确率从53%提升至62%,说明网络需要更多epoch才能充分学习
-
无过拟合: 训练准确率与测试准确率接近,说明数据增强和Dropout有效防止了过拟合
-
GPU加速效果: RTX 5090使得训练大尺寸图像模型成为可能,平均每epoch仅需9.5秒
改进建议
-
使用更深网络: 如ResNet18/34,利用残差连接提升大图像特征提取能力
-
学习率调度: 使用ReduceLROnPlateau或CosineAnnealing,在后期获得更细粒度的收敛
-
更强数据增强: 添加RandomErasing、AutoAugment等策略
-
尝试预训练模型: 使用ImageNet预训练的ResNet,迁移学习到STL-10
-
利用无标签数据: STL-10提供100000张无标签图像,可用于半监督学习