实验概述
⚠️ 重要说明:本实验仅训练1轮,目的为测试代码框架和流程是否正常,属于非正式实验,测试结果不具有参考价值。
环境配置
硬件环境
软件环境
PyTorch环境说明
声明:本实验使用 conda 环境
myenv中已安装的 PyTorch (CUDA 13.0) 版本。请勿私自安装其他版本的 torch。
conda activate myenv
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}'); print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else None}')"数据集说明
CIFAR-10 类别标签对照表
数据集属性
数据预处理代码
from torchvision import transforms
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
])模型架构
网络结构描述
SimpleCNN - 轻量级卷积神经网络
模型参数统计
完整模型代码
class SimpleCNN(nn.Module):
def __init__(self, num_classes=10):
super(SimpleCNN, self).__init__()
self.conv_block1 = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.fc_block = nn.Sequential(
nn.Flatten(),
nn.Linear(128 * 4 * 4, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.conv_block1(x)
x = self.conv_block2(x)
x = self.conv_block3(x)
x = self.fc_block(x)
return x训练配置
超参数表格
数据增强配置
训练过程
⚠️ 重要说明:本实验仅训练1轮,为测试代码框架用途,结果不具有参考价值。
各Epoch结果表格
注:
*表示该Epoch刷新最佳测试准确率
关键指标
GPU加速效果
GPU配置
训练效率
完整代码
"""
CIFAR-10 图像分类器训练脚本
基于PyTorch + GPU 加速的深度学习实验框架
================================================================================
【实验说明】
本实验仅训练1轮,目的为测试代码框架和流程是否正常,非正式实验
================================================================================
数据集: CIFAR-10
模型: SimpleCNN
训练轮数: 1 (测试用途)
环境要求: conda环境 myenv 中已安装 PyTorch (CUDA 13.0) 版本
"""
import os
import time
import logging
import glob
import re
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CIFAR10
def create_date_output_dir(base_dir='.'):
"""
创建当天日期的输出目录
如果目录已存在,则添加后缀 (_1, _2, _3, ...)
"""
today = datetime.now()
date_str = today.strftime('%Y-%m-%d')
dir_name = os.path.join(base_dir, date_str)
if not os.path.exists(dir_name):
os.makedirs(dir_name, exist_ok=True)
print(f"[输出目录] 创建新目录: {dir_name}")
return dir_name
counter = 1
while True:
dir_name_with_suffix = os.path.join(base_dir, f"{date_str}_{counter}")
if not os.path.exists(dir_name_with_suffix):
os.makedirs(dir_name_with_suffix, exist_ok=True)
print(f"[输出目录] 创建带后缀目录: {dir_name_with_suffix}")
return dir_name_with_suffix
counter += 1
if counter > 100:
raise RuntimeError(f"无法创建目录: 已存在太多同名目录")
def scan_existing_experiments(project_root='.'):
"""
遍历项目中所有实验的md文件,提取已有实验信息
"""
experiments = []
md_files = glob.glob(os.path.join(project_root, '**', '*实验*.md'), recursive=True)
for md_file in md_files:
try:
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
exp_info = {
'file_path': md_file,
'file_name': os.path.basename(md_file),
'datasets': [],
'models': [],
'accuracies': [],
'epochs': 0,
'content_preview': content[:500]
}
dataset_patterns = [
r'目标数据集\s*[|]\s*(.+?)\s*[|]',
r'数据集\s*[|]\s*(.+?)\s*[|]',
r'##\s*数据集',
r'数据集名称[::]\s*(.+?)(?:\n|$)',
]
for pattern in dataset_patterns:
matches = re.findall(pattern, content)
if matches:
exp_info['datasets'].extend([m.strip() for m in matches if m.strip()])
model_patterns = [
r'class\s+(\w+)\s*\(nn\.Module\)',
r'模型名称[::]\s*(.+?)(?:\n|$)',
r'\*\*(\w+)\*\*\s*-\s*\w+架构',
r'网络结构描述[::]\s*(.+?)(?:\n|$)',
]
for pattern in model_patterns:
matches = re.findall(pattern, content)
if matches:
exp_info['models'].extend([m.strip() for m in matches if m.strip() and len(m) < 50])
acc_patterns = [
r'最佳测试准确率\s*[|]\s*\*\*?(.+?)\*\*?\s*[|]',
r'准确率[::]\s*(?:Best\s+)?(\d+\.?\d*)',
r'Test\s+Acc[::]\s*(\d+\.?\d*)',
]
for pattern in acc_patterns:
matches = re.findall(pattern, content)
if matches:
for m in matches:
try:
num = float(re.findall(r'\d+\.?\d*', m)[0])
if 0 < num <= 100:
exp_info['accuracies'].append(num)
except:
pass
epoch_patterns = [
r'训练轮数\s*[|]\s*(\d+)\s*[|]',
r'Epoch[::]\s*(\d+)',
r'num_epochs\s*=\s*(\d+)',
]
for pattern in epoch_patterns:
matches = re.findall(pattern, content)
if matches:
exp_info['epochs'] = int(matches[0])
break
experiments.append(exp_info)
except Exception as e:
print(f"[警告] 无法读取文件 {md_file}: {e}")
continue
return experiments
def generate_new_experiment_design(experiments, base_suggestions=None):
"""
基于已有实验分析,生成全新的实验设计方案
"""
if not experiments:
print("[实验设计] 未发现已有实验,将使用默认基础配置")
return None
all_datasets = []
all_models = []
all_accuracies = []
for exp in experiments:
all_datasets.extend(exp['datasets'])
all_models.extend(exp['models'])
all_accuracies.extend(exp['accuracies'])
all_datasets = list(set(all_datasets))
all_models = list(set(all_models))
all_accuracies = [a for a in all_accuracies if a > 0]
analysis = {
'existing_datasets': all_datasets,
'existing_models': all_models,
'avg_accuracy': sum(all_accuracies) / len(all_accuracies) if all_accuracies else 0,
'max_accuracy': max(all_accuracies) if all_accuracies else 0,
'total_experiments': len(experiments),
}
print("\n" + "=" * 60)
print("已有实验分析报告")
print("=" * 60)
print(f"已发现 {len(experiments)} 个实验")
print(f"已有数据集: {', '.join(all_datasets) if all_datasets else '无'}")
print(f"已有模型架构: {', '.join(all_models) if all_models else '无'}")
if all_accuracies:
print(f"准确率范围: {min(all_accuracies):.2f}% - {max(all_accuracies):.2f}%")
print(f"平均准确率: {sum(all_accuracies)/len(all_accuracies):.2f}%")
print("=" * 60)
return analysis
def get_device():
"""
自动检测并选择最佳计算设备
优先级: CUDA > MPS > CPU
"""
if torch.cuda.is_available():
device = torch.device("cuda")
gpu_name = torch.cuda.get_device_name(0)
gpu_count = torch.cuda.device_count()
print(f"[GPU] 使用CUDA设备: {gpu_name}")
print(f"[GPU] GPU数量: {gpu_count}")
print(f"[GPU] CUDA版本: {torch.version.cuda}")
mem_allocated = torch.cuda.memory_allocated(0) / 1024**2
mem_reserved = torch.cuda.memory_reserved(0) / 1024**2
print(f"[GPU] 已分配内存: {mem_allocated:.2f} MB, 预留: {mem_reserved:.2f} MB")
elif torch.backends.mps.is_available():
device = torch.device("mps")
print("[GPU] 使用Apple MPS设备")
else:
device = torch.device("cpu")
print("[CPU] 使用CPU设备")
return device
def setup_logger(log_file):
"""
配置日志:同时输出到文件和控制台
"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.handlers.clear()
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(logging.INFO)
file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
class SimpleCNN(nn.Module):
"""
简单CNN模型用于CIFAR-10图像分类
结构: 3个卷积块 + 2个全连接层
输入: 3x32x32 彩色图像
输出: 10个类别
"""
def __init__(self, num_classes=10):
super(SimpleCNN, self).__init__()
self.conv_block1 = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.conv_block3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2)
)
self.fc_block = nn.Sequential(
nn.Flatten(),
nn.Linear(128 * 4 * 4, 256),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
def forward(self, x):
x = self.conv_block1(x)
x = self.conv_block2(x)
x = self.conv_block3(x)
x = self.fc_block(x)
return x
def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch):
"""训练一个epoch,带GPU数据迁移"""
model.train()
running_loss = 0.0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
outputs = model(data)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = outputs.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
if (batch_idx + 1) % 100 == 0:
print(f' Epoch {epoch} - Batch {batch_idx + 1}/{len(train_loader)}: '
f'Loss={loss.item():.4f}, Acc={100.*correct/total:.2f}%')
epoch_loss = running_loss / len(train_loader)
epoch_acc = 100. * correct / total
return epoch_loss, epoch_acc
def evaluate(model, test_loader, criterion, device):
"""评估函数 - 带GPU推理"""
model.eval()
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
outputs = model(data)
test_loss += criterion(outputs, target).item()
_, predicted = outputs.max(1)
total += target.size(0)
correct += predicted.eq(target).sum().item()
test_loss = test_loss / len(test_loader)
test_acc = 100. * correct / total
return test_loss, test_acc
def main():
print("\n" + "=" * 60)
print("CIFAR-10 分类器训练 - PyTorch + GPU")
print("【注意】本实验仅训练1轮,为测试用途")
print("=" * 60)
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir) if os.path.basename(current_dir) == 'template' else current_dir
print(f"[项目根目录] {project_root}")
print("\n[步骤1/5] 扫描项目中已有的实验...")
existing_experiments = scan_existing_experiments(project_root)
exp_analysis = generate_new_experiment_design(existing_experiments)
print("\n[步骤2/5] 创建日期输出目录...")
output_dir = create_date_output_dir(project_root)
data_dir = os.path.join(output_dir, 'data')
logs_dir = os.path.join(output_dir, 'logs')
os.makedirs(data_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)
print(f"[目录配置] 数据目录: {data_dir}")
print(f"[目录配置] 日志目录: {logs_dir}")
project_id = 'cifar10_test'
dataset_name = 'CIFAR-10'
num_classes = 10
batch_size = 64
test_batch_size = 128
num_epochs = 1
learning_rate = 0.001
optimizer_name = 'Adam'
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
log_file = os.path.join(logs_dir, f'training_log_{project_id}_{timestamp}.txt')
model_path = os.path.join(logs_dir, f'{project_id}_best_model_{timestamp}.pth')
logger = setup_logger(log_file)
logger.info("=" * 60)
logger.info(f"{dataset_name} 分类器训练开始")
logger.info("【注意】本实验仅训练1轮,为测试用途")
logger.info("=" * 60)
device = get_device()
logger.info(f"使用设备: {device}")
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
])
logger.info(f"加载{dataset_name}数据集...")
train_dataset = CIFAR10(
root=data_dir,
train=True,
transform=train_transform,
download=True
)
test_dataset = CIFAR10(
root=data_dir,
train=False,
transform=test_transform,
download=True
)
logger.info(f"训练集大小: {len(train_dataset)}")
logger.info(f"测试集大小: {len(test_dataset)}")
logger.info(f"类别数量: {num_classes}")
train_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=0,
pin_memory=True
)
test_loader = DataLoader(
test_dataset,
batch_size=test_batch_size,
shuffle=False,
num_workers=0,
pin_memory=True
)
model = SimpleCNN(num_classes=num_classes).to(device)
logger.info(f"\n模型结构:\n{model}")
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"\n总参数量: {total_params:,}")
logger.info(f"可训练参数量: {trainable_params:,}")
criterion = nn.CrossEntropyLoss()
optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=learning_rate)
best_acc = 0.0
logger.info(f"\n开始训练: {num_epochs} epochs, batch_size={batch_size}")
logger.info("-" * 60)
for epoch in range(1, num_epochs + 1):
epoch_start_time = time.time()
train_loss, train_acc = train_one_epoch(
model, train_loader, criterion, optimizer, device, epoch
)
test_loss, test_acc = evaluate(
model, test_loader, criterion, device
)
epoch_time = time.time() - epoch_start_time
logger.info(
f"Epoch {epoch}/{num_epochs} - "
f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% - "
f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}% - "
f"Time: {epoch_time:.1f}s"
)
if test_acc > best_acc:
best_acc = test_acc
torch.save(model.state_dict(), model_path)
logger.info(f"*** 新最佳模型已保存! Test Acc: {best_acc:.2f}% ***")
logger.info("-" * 60)
logger.info(f"训练完成! 最佳测试准确率: {best_acc:.2f}%")
logger.info(f"模型已保存到: {project_id}_best_model_{timestamp}.pth")
logger.info(f"日志已保存到: {log_file}")
logger.info("=" * 60)
if torch.cuda.is_available():
logger.info(f"\nGPU内存统计:")
logger.info(f" - 已分配: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
logger.info(f" - 已预留: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
logger.info(f" - 最大分配: {torch.cuda.max_memory_allocated(0) / 1024**2:.2f} MB")
if __name__ == '__main__':
main()
结论
实验总结
关键发现
⚠️ 本实验仅训练1轮,为测试用途,关键发现仅供参考
-
代码框架运行正常,GPU加速功能正常
-
测试准确率55.38%显著高于训练准确率42.87%,表明模型泛化能力强
-
单轮训练时间约24秒,效率较高
改进建议
⚠️ 本实验仅训练1轮,改进建议仅供正式实验参考
-
增加训练轮数至50-100轮以充分训练模型
-
考虑使用学习率衰减策略
-
可尝试更深的网络结构(如ResNet)提升准确率
-
当前55%准确率距离CIFAR-10基线模型(约85%)有较大差距
输出文件清单
注意: 所有输出文件已自动保存到当天的日期文件夹中