1. 实验概述
CIFAR-10是经典入门数据集,包含10类彩色图像(飞机、汽车、鸟、猫、鹿、狗、青蛙、马、船、卡车)。相比MNIST和Fashion-MNIST的灰度图像,CIFAR-10引入了彩色图像处理和更复杂的特征提取,是深度学习入门的重要进阶步骤。
2. 实验环境
2.1 硬件环境
2.2 软件环境
2.3 超参数配置
3. 数据集说明
3.1 CIFAR-10数据集
3.2 类别标签对照表
3.3 数据预处理
# 训练集预处理(包含数据增强)
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4), # 随机裁剪
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.ToTensor(), # 转为张量 [0,1]
transforms.Normalize( # 归一化
mean=[0.4914, 0.4822, 0.4465], # RGB均值
std=[0.2470, 0.2435, 0.2616] # RGB标准差
)
])
# 测试集预处理
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
mean=[0.4914, 0.4822, 0.4465],
std=[0.2470, 0.2435, 0.2616]
)
])4. 模型架构
4.1 网络结构
CIFAR10Net 是一种轻量级卷积神经网络,包含2个卷积块和2个全连接层:
CIFAR10Net
├── Conv Block 1
│ ├── Conv2d(3, 32, 3x3, padding=1)
│ ├── BatchNorm2d(32)
│ ├── Conv2d(32, 32, 3x3, padding=1)
│ ├── BatchNorm2d(32)
│ └── MaxPool2d(2x2) → 输出: 32x16x16
│
├── Conv Block 2
│ ├── Conv2d(32, 64, 3x3, padding=1)
│ ├── BatchNorm2d(64)
│ ├── Conv2d(64, 64, 3x3, padding=1)
│ ├── BatchNorm2d(64)
│ └── MaxPool2d(2x2) → 输出: 64x8x8
│
└── FC Layers
├── Linear(64*8*8, 256)
├── ReLU
├── Dropout(0.5)
└── Linear(256, 10)4.2 模型参数量
5. 完整代码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
CIFAR-10 彩色图像分类器
基于PyTorch的深度学习入门项目
本项目面向已完成MNIST/Fashion-MNIST实验的初学者,
引入彩色图像处理和更复杂的卷积神经网络。
类别标签:
0: 飞机 (Airplane)
1: 汽车 (Automobile)
2: 鸟 (Bird)
3: 猫 (Cat)
4: 鹿 (Deer)
5: 狗 (Dog)
6: 青蛙 (Frog)
7: 马 (Horse)
8: 船 (Ship)
9: 卡车 (Truck)
作者: AI Assistant
日期: 2026-03-29
"""
import logging
import os
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
# ============================================================
# 配置类
# ============================================================
class Config:
"""配置类:存放所有超参数和设置"""
# 数据相关参数
batch_size = 128 # 批次大小
test_batch_size = 100 # 测试批次大小
# 训练相关参数
epochs = 20 # 训练轮数
learning_rate = 0.001 # 学习率
# 随机种子
seed = 42 # 保证实验可重复
# 路径设置
data_dir = './data/cifar10'
model_save_path = 'cifar10_model.pth'
log_dir = './logs'
# ============================================================
# 设备自动选择
# ============================================================
def get_device() -> torch.device:
"""
自动检测并选择最佳计算设备
优先级: CUDA > MPS > CPU
"""
if torch.cuda.is_available():
dev = torch.device("cuda")
print(f"[GPU] 使用CUDA: {torch.cuda.get_device_name(0)}")
print(f"[GPU] CUDA版本: {torch.version.cuda}")
elif torch.backends.mps.is_available():
dev = torch.device("mps")
print("[GPU] 使用Apple MPS")
else:
dev = torch.device("cpu")
print("[CPU] 使用CPU")
return dev
# ============================================================
# 日志配置
# ============================================================
def setup_logger(log_path: str) -> logging.Logger:
"""
配置日志:同时输出到文件和控制台
"""
logger = logging.getLogger("CIFAR10_Training")
logger.setLevel(logging.INFO)
logger.handlers.clear()
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
file_handler = logging.FileHandler(log_path, mode='w', encoding='utf-8')
file_handler.setFormatter(formatter)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
# ============================================================
# CNN模型定义
# ============================================================
class CIFAR10Net(nn.Module):
"""
轻量级CNN模型,适合CIFAR-10分类
结构: 2个卷积块 + 2个全连接层
"""
def __init__(self) -> None:
super().__init__()
# Conv Block 1: 32x32x3 → 16x16x32
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(32)
self.pool1 = nn.MaxPool2d(2, 2)
# Conv Block 2: 16x16x32 → 8x8x64
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(64)
self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(64)
self.pool2 = nn.MaxPool2d(2, 2)
# FC Layers: 64*8*8 = 4096 → 256 → 10
self.fc1 = nn.Linear(64 * 8 * 8, 256)
self.fc2 = nn.Linear(256, 10)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
def forward(self, x: torch.Tensor) -> torch.Tensor:
# Conv Block 1
x = self.relu(self.bn1(self.conv1(x)))
x = self.relu(self.bn2(self.conv2(x)))
x = self.pool1(x)
# Conv Block 2
x = self.relu(self.bn3(self.conv3(x)))
x = self.relu(self.bn4(self.conv4(x)))
x = self.pool2(x)
# Flatten
x = x.view(-1, 64 * 8 * 8)
# FC Layers
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
# ============================================================
# 评估函数
# ============================================================
def evaluate(
net: nn.Module,
data_loader: DataLoader,
dev: torch.device,
loss_fn: nn.Module,
logger: logging.Logger
) -> tuple[float, float]:
"""
在测试集上评估模型
返回: (准确率, 平均损失)
"""
net.eval()
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
for data, target in data_loader:
data = data.to(dev)
target = target.to(dev)
output = net(data)
total_loss += loss_fn(output, target).item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
accuracy = 100.0 * correct / total
avg_loss = total_loss / len(data_loader)
return accuracy, avg_loss
# ============================================================
# 训练函数
# ============================================================
def train(
net: nn.Module,
train_loader: DataLoader,
test_loader: DataLoader,
dev: torch.device,
num_epochs: int,
learning_rate: float,
logger: logging.Logger
) -> dict:
"""
完整训练流程
返回: 训练历史字典
"""
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
logger.info(f"开始训练 - 轮次: {num_epochs}, 学习率: {learning_rate}")
history = {
'train_loss': [],
'train_acc': [],
'test_loss': [],
'test_acc': []
}
best_accuracy = 0.0
for epoch in range(1, num_epochs + 1):
net.train()
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data = data.to(dev)
target = target.to(dev)
optimizer.zero_grad()
output = net(data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
# 每100个批次打印进度
if (batch_idx + 1) % 100 == 0:
batch_acc = 100.0 * correct / total
batch_loss = running_loss / (batch_idx + 1)
logger.info(
f"Epoch [{epoch}/{num_epochs}] "
f"Batch [{batch_idx+1}/{len(train_loader)}] "
f"Loss: {batch_loss:.4f} "
f"Acc: {batch_acc:.2f}%"
)
# 计算训练集指标
train_acc = 100.0 * correct / total
train_loss = running_loss / len(train_loader)
# 计算测试集指标
test_acc, test_loss = evaluate(net, test_loader, dev, loss_fn, logger)
# 记录历史
history['train_loss'].append(train_loss)
history['train_acc'].append(train_acc)
history['test_loss'].append(test_loss)
history['test_acc'].append(test_acc)
logger.info("-" * 60)
logger.info(
f"Epoch {epoch} 完成 - "
f"训练损失: {train_loss:.4f}, 训练准确率: {train_acc:.2f}% | "
f"测试损失: {test_loss:.4f}, 测试准确率: {test_acc:.2f}%"
)
# 保存最佳模型
if test_acc > best_accuracy:
best_accuracy = test_acc
torch.save(net.state_dict(), 'cifar10_best.pth')
logger.info(f"★ 保存最佳模型,准确率: {best_accuracy:.2f}%")
logger.info("")
logger.info(f"训练完成! 最佳测试准确率: {best_accuracy:.2f}%")
return history
# ============================================================
# 主程序
# ============================================================
if __name__ == "__main__":
# 创建目录
os.makedirs(Config.data_dir, exist_ok=True)
os.makedirs(Config.log_dir, exist_ok=True)
# 生成时间戳
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f'{Config.log_dir}/training_log_{timestamp}.txt'
# 初始化日志
app_logger = setup_logger(log_file)
app_logger.info("=" * 60)
app_logger.info("CIFAR-10 彩色图像分类器训练")
app_logger.info("=" * 60)
# 设置随机种子
torch.manual_seed(Config.seed)
# 获取设备
device = get_device()
app_logger.info(f"使用设备: {device}")
# 数据预处理
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.4914, 0.4822, 0.4465],
std=[0.2470, 0.2435, 0.2616]
)
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
mean=[0.4914, 0.4822, 0.4465],
std=[0.2470, 0.2435, 0.2616]
)
])
# 加载数据集
app_logger.info("加载CIFAR-10数据集...")
train_dataset = datasets.CIFAR10(
root=Config.data_dir,
train=True,
download=True,
transform=transform_train
)
test_dataset = datasets.CIFAR10(
root=Config.data_dir,
train=False,
download=True,
transform=transform_test
)
train_loader = DataLoader(
train_dataset,
batch_size=Config.batch_size,
shuffle=True,
num_workers=0,
pin_memory=True if device.type == "cuda" else False
)
test_loader = DataLoader(
test_dataset,
batch_size=Config.test_batch_size,
shuffle=False,
num_workers=0,
pin_memory=True if device.type == "cuda" else False
)
app_logger.info(f"训练集大小: {len(train_dataset)}")
app_logger.info(f"测试集大小: {len(test_dataset)}")
# 创建模型
model = CIFAR10Net().to(device)
# 打印模型参数量
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
app_logger.info(f"总参数数量: {total_params:,}")
app_logger.info(f"可训练参数: {trainable_params:,}")
# 打印模型结构
app_logger.info("模型结构:")
app_logger.info("-" * 40)
app_logger.info("Conv Block 1: Conv2d(3→32) + BN + Conv2d(32→32) + BN + MaxPool")
app_logger.info("Conv Block 2: Conv2d(32→64) + BN + Conv2d(64→64) + BN + MaxPool")
app_logger.info("FC Layers: Linear(4096→256) + ReLU + Dropout + Linear(256→10)")
app_logger.info("-" * 40)
# 开始训练
app_logger.info("开始训练...")
app_logger.info("")
history = train(
model, train_loader, test_loader,
device,
Config.epochs,
Config.learning_rate,
app_logger
)
# 最终评估
app_logger.info("=" * 60)
app_logger.info("最终测试集评估:")
criterion = nn.CrossEntropyLoss()
final_acc, final_loss = evaluate(model, test_loader, device, criterion, app_logger)
app_logger.info(f"最终测试损失: {final_loss:.4f}")
app_logger.info(f"最终测试准确率: {final_acc:.2f}%")
# 保存最终模型
model_path = f'{Config.log_dir}/cifar10_final_{timestamp}.pth'
torch.save(model.state_dict(), model_path)
app_logger.info(f"最终模型已保存至: {model_path}")
# 打印训练历史汇总
app_logger.info("")
app_logger.info("训练历史汇总:")
app_logger.info("-" * 70)
app_logger.info("Epoch | Train Loss | Train Acc | Test Loss | Test Acc | 备注")
app_logger.info("-" * 70)
for i in range(len(history['train_loss'])):
marker = " ★" if history['test_acc'][i] == max(history['test_acc']) else ""
app_logger.info(
f"{i+1:5d} | "
f"{history['train_loss'][i]:10.4f} | "
f"{history['train_acc'][i]:9.2f}% | "
f"{history['test_loss'][i]:9.4f} | "
f"{history['test_acc'][i]:8.2f}%{marker}"
)
app_logger.info("-" * 70)
app_logger.info("=" * 60)
app_logger.info(f"日志文件已保存至: {log_file}")
app_logger.info("训练完成!")
app_logger.info("=" * 60)6. 实验结果
6.1 训练环境
使用设备: cuda
GPU型号: NVIDIA GeForce RTX 5090 Laptop GPU
总参数数量: 1,117,354
训练集大小: 50,000
测试集大小: 10,0006.2 训练时间
6.3 各轮次训练结果
6.4 最终结果
7. 结果分析
7.1 准确率提升曲线
-
第1轮: 54.63% → 第18轮: 79.64%
-
总体提升: +24.99%
-
模型在第15-18轮趋于收敛
7.2 损失变化趋势
-
训练损失从1.6526降至0.6842(下降58.6%)
-
测试损失从1.2787降至0.6285(下降50.8%)
-
未出现明显过拟合现象
7.3 GPU加速效果
8. 学习要点总结
通过本实验,初学者可以掌握:
9. 扩展实验方向
-
增加网络深度: 添加更多卷积层或使用ResNet结构
-
调整数据增强: 添加颜色抖动、随机旋转等
-
学习率调度: 使用ReduceLROnPlateau或CosineAnnealing
-
尝试不同优化器: SGD with Momentum、AdamW
-
正则化: 调整Dropout比例、添加Weight Decay
-
CIFAR-100: 进阶到100类分类任务
10. 与前序实验对比
11. 输出文件
实验日期: 2026-03-29