1. 实验概述
Fashion-MNIST是MNIST手写数字的升级替代品,包含10类服装/配饰的灰度图像。相比MNIST,它更具挑战性但比CIFAR-10简单,非常适合作为深度学习入门项目。
2. 实验环境
2.1 硬件环境
2.2 软件环境
2.3 超参数配置
3. 数据集说明
3.1 Fashion-MNIST数据集
3.2 类别标签对照表
3.3 数据预处理
transform_train = transforms.Compose([
transforms.RandomCrop(28, padding=2), # 随机裁剪
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.ToTensor(), # 转为张量
transforms.Normalize((0.2860,), (0.3530,)) # 归一化
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.2860,), (0.3530,))
])4. 模型架构
4.1 网络结构
FashionMNISTNet 是一种轻量级卷积神经网络,包含2个卷积块和2个全连接层。
FashionMNISTNet
├── Conv Block 1
│ ├── Conv2d(1, 32, 3x3, padding=1)
│ ├── BatchNorm2d(32)
│ ├── Conv2d(32, 32, 3x3, padding=1)
│ ├── BatchNorm2d(32)
│ └── MaxPool2d(2x2)
│
├── Conv Block 2
│ ├── Conv2d(32, 64, 3x3, padding=1)
│ ├── BatchNorm2d(64)
│ ├── Conv2d(64, 64, 3x3, padding=1)
│ ├── BatchNorm2d(64)
│ └── MaxPool2d(2x2)
│
└── FC Layers
├── Linear(64*7*7, 256)
├── ReLU
├── Dropout(0.5)
└── Linear(256, 10)4.2 完整源代码
"""
Fashion-MNIST 服装分类器
基于PyTorch的深度学习入门项目
类别标签:
0: T恤/上衣 (T-shirt/top)
1: 裤子 (Trouser)
2: 套头衫 (Pullover)
3: 连衣裙 (Dress)
4: 外套 (Coat)
5: 凉鞋 (Sandal)
6: 衬衫 (Shirt)
7: 运动鞋 (Sneaker)
8: 包 (Bag)
9: 靴子 (Ankle boot)
"""
import logging
import os
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
# ============================================================
# 设备自动选择
# ============================================================
def get_device() -> torch.device:
"""
自动检测并选择最佳计算设备
优先级: CUDA > MPS > CPU
"""
if torch.cuda.is_available():
dev = torch.device("cuda")
print(f"[GPU] 使用CUDA: {torch.cuda.get_device_name(0)}")
print(f"[GPU] CUDA版本: {torch.version.cuda}")
elif torch.backends.mps.is_available():
dev = torch.device("mps")
print("[GPU] 使用Apple MPS")
else:
dev = torch.device("cpu")
print("[CPU] 使用CPU")
return dev
# ============================================================
# 日志配置
# ============================================================
def setup_logger(log_path: str) -> logging.Logger:
"""
配置日志:同时输出到文件和控制台
"""
logger = logging.getLogger("FashionMNIST_Training")
logger.setLevel(logging.INFO)
logger.handlers.clear()
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
file_handler = logging.FileHandler(log_path, mode='w', encoding='utf-8')
file_handler.setFormatter(formatter)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
# ============================================================
# CNN模型定义
# ============================================================
class FashionMNISTNet(nn.Module):
"""
轻量级CNN模型,适合Fashion-MNIST分类
结构: 2个卷积块 + 2个全连接层
"""
def __init__(self) -> None:
super().__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(32)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(64)
self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(64)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 7 * 7, 256)
self.fc2 = nn.Linear(256, 10)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.relu(self.bn1(self.conv1(x)))
x = self.relu(self.bn2(self.conv2(x)))
x = self.pool1(x)
x = self.relu(self.bn3(self.conv3(x)))
x = self.relu(self.bn4(self.conv4(x)))
x = self.pool2(x)
x = x.view(-1, 64 * 7 * 7)
x = self.relu(self.fc1(x))
x = self.dropout(x)
return self.fc2(x)
# ============================================================
# 评估函数
# ============================================================
def evaluate(
net: nn.Module,
data_loader: DataLoader,
dev: torch.device,
loss_fn: nn.Module,
logger: logging.Logger
) -> float:
"""
在测试集上评估模型
"""
net.eval()
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
for data, target in data_loader:
data = data.to(dev)
target = target.to(dev)
output = net(data)
total_loss += loss_fn(output, target).item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
accuracy = 100.0 * correct / total
avg_loss = total_loss / len(data_loader)
logger.info(f"测试集 - 损失: {avg_loss:.4f}, 准确率: {accuracy:.2f}%")
return accuracy
# ============================================================
# 训练函数
# ============================================================
def train(
net: nn.Module,
train_loader: DataLoader,
test_loader: DataLoader,
dev: torch.device,
num_epochs: int,
learning_rate: float,
logger: logging.Logger,
train_size: int,
test_size: int
) -> float:
"""
完整训练流程
"""
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
logger.info(f"开始训练 - 轮次: {num_epochs}, 学习率: {learning_rate}")
logger.info(f"训练样本数: {train_size}, 测试样本数: {test_size}")
best_accuracy = 0.0
for epoch in range(1, num_epochs + 1):
net.train()
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data = data.to(dev)
target = target.to(dev)
optimizer.zero_grad()
output = net(data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = output.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
if (batch_idx + 1) % 100 == 0:
batch_acc = 100.0 * correct / total
logger.info(
f"轮次 [{epoch}/{num_epochs}] "
f"批次 [{batch_idx+1}/{len(train_loader)}] "
f"损失: {running_loss/(batch_idx+1):.4f} "
f"准确率: {batch_acc:.2f}%"
)
test_accuracy = evaluate(net, test_loader, dev, loss_fn, logger)
if test_accuracy > best_accuracy:
best_accuracy = test_accuracy
torch.save(net.state_dict(), 'fashion_mnist_best.pth')
logger.info(f"保存最佳模型,准确率: {best_accuracy:.2f}%")
logger.info("-" * 60)
logger.info(f"训练完成! 最佳测试准确率: {best_accuracy:.2f}%")
return best_accuracy
# ============================================================
# 主程序
# ============================================================
if __name__ == "__main__":
os.makedirs('./data', exist_ok=True)
os.makedirs('./logs', exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f'./logs/training_log_{timestamp}.txt'
app_logger = setup_logger(log_file)
app_logger.info("=" * 60)
app_logger.info("Fashion-MNIST 服装分类器训练")
app_logger.info("=" * 60)
device = get_device()
app_logger.info(f"使用设备: {device}")
transform_train = transforms.Compose([
transforms.RandomCrop(28, padding=2),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.2860,), (0.3530,))
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.2860,), (0.3530,))
])
app_logger.info("加载Fashion-MNIST数据集...")
train_dataset = datasets.FashionMNIST(
root='./data',
train=True,
download=True,
transform=transform_train
)
test_dataset = datasets.FashionMNIST(
root='./data',
train=False,
download=True,
transform=transform_test
)
train_loader = DataLoader(
train_dataset,
batch_size=128,
shuffle=True,
num_workers=0,
pin_memory=True
)
test_loader = DataLoader(
test_dataset,
batch_size=128,
shuffle=False,
num_workers=0,
pin_memory=True
)
app_logger.info(f"训练集大小: {len(train_dataset)}")
app_logger.info(f"测试集大小: {len(test_dataset)}")
model = FashionMNISTNet().to(device)
total_params = sum(p.numel() for p in model.parameters())
app_logger.info(f"模型总参数量: {total_params:,}")
epochs = 10
lr = 0.001
train_size = len(train_dataset)
test_size = len(test_dataset)
train(
model, train_loader, test_loader,
device, epochs, lr, app_logger,
train_size, test_size
)
app_logger.info("=" * 60)
app_logger.info("最终测试集评估:")
criterion = nn.CrossEntropyLoss()
evaluate(model, test_loader, device, criterion, app_logger)
model_path = f'./logs/fashion_mnist_final_{timestamp}.pth'
torch.save(model.state_dict(), model_path)
app_logger.info(f"模型已保存至: {model_path}")
app_logger.info(f"日志已保存至: {log_file}")
app_logger.info("=" * 60)5. 实验结果
5.1 训练日志
2026-03-28 21:45:01 - INFO - Fashion-MNIST 服装分类器训练
2026-03-28 21:45:01 - INFO - 使用设备: cuda
2026-03-28 21:45:18 - INFO - 训练集大小: 60000
2026-03-28 21:45:18 - INFO - 测试集大小: 10000
2026-03-28 21:45:18 - INFO - 模型总参数量: 871,018
2026-03-28 21:45:18 - INFO - 开始训练 - 轮次: 10, 学习率: 0.0015.2 各轮次测试结果
5.3 最终结果
6. 结果分析
6.1 准确率提升曲线
-
第1轮: 87.97% → 第9轮: 92.27%
-
总体提升: +4.30%
-
模型在第6-9轮趋于收敛
6.2 损失变化趋势
-
训练损失从0.8931降至0.2433
-
测试损失从0.3326降至0.2161
-
未出现明显过拟合现象
7. 结论
-
模型表现: 在10轮训练后达到92.27%的测试准确率
-
训练效率: 使用GPU仅需约2分28秒完成训练
-
模型规模: 约87万参数,轻量级模型
-
扩展方向:
-
增加网络深度(ResNet思想)
-
尝试不同优化器(SGD, RMSprop)
-
调整学习率策略
-
添加更多数据增强
-