torch-train-study2 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import torch as t
  2. import torchvision as tv
  3. import torchvision.transforms as transforms
  4. import torch.nn as nn
  5. import torch.nn.functional as F
  6. #错误的还未学习
  7. ########## 超参数设置 ##########
  8. epochs = 200
  9. learning_rate = 0.001
  10. batch_size = 256
  11. gpu_ids = [0, 1, 2]
  12. ########## 一、数据加载与预处理 ##########
  13. transform = transforms.Compose([
  14. transforms.ToTensor(),
  15. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
  16. ])
  17. trainset = tv.datasets.CIFAR10(root='./data/', train=True, download=True, transform=transform)
  18. testset = tv.datasets.CIFAR10(root='./data/', train=False, download=True, transform=transform)
  19. trainloader = t.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
  20. testloader = t.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)
  21. ########## 二、定义神经网络 ##########
  22. class LeNet(nn.Module):
  23. def __init__(self):
  24. super(LeNet, self).__init__()
  25. # (in_channels, out_channels, kernel_size, stride, padding)
  26. self.conv1 = nn.Conv2d(3, 6, 5, 1, 0)
  27. self.conv2 = nn.Conv2d(6, 16, 5, 1, 0)
  28. self.fc1 = nn.Linear(16 * 5 * 5, 120)
  29. self.fc2 = nn.Linear(120, 84)
  30. self.fc3 = nn.Linear(84, 10)
  31. def forward(self, x):
  32. x = F.max_pool2d(F.relu(self.conv1(x)), 2, 2)
  33. x = F.max_pool2d(F.relu(self.conv2(x)), 2, 2)
  34. x = x.view(x.size()[0], -1)
  35. x = F.relu(self.fc1(x))
  36. x = F.relu(self.fc2(x))
  37. x = self.fc3(x)
  38. return x
  39. ########## 三、训练神经网络 ##########
  40. # 神经网络实例化
  41. net = LeNet()
  42. if (len(gpu_ids) == 0) or (not t.cuda.is_available()):
  43. # gpu_ids列表为空, 或者没GPU可用,则设置为GPU模式。
  44. device = t.device("cpu")
  45. print("Train Mode : CPU")
  46. elif t.cuda.is_available() and len(gpu_ids) > 1:
  47. # gpu_ids列表大于1,表明想用多个GPU训练
  48. device = t.device("cuda:0")
  49. net = nn.DataParallel(net, device_ids=gpu_ids)
  50. print("Train Mode : Multi GPU;", gpu_ids)
  51. else:
  52. # gpu_ids列表等于1,表明想用一个GPU训练
  53. device = t.device("cuda:" + str(gpu_ids[0]) if t.cuda.is_available() else "cpu")
  54. print("Train Mode : One GPU;", device)
  55. net = net.to(device)
  56. print("\n", "##" * 10, " NetWork ", "##" * 10, "\n", net, "\n", "##" * 26, "\n")
  57. # 定义损失函数和优化器
  58. criterion = nn.CrossEntropyLoss()
  59. optimizer = t.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
  60. # 开始训练
  61. for epoch in range(epochs):
  62. running_loss = 0.0
  63. for i, (inputs, labels) in enumerate(trainloader):
  64. inputs, labels = inputs.to(device), labels.to(device)
  65. optimizer.zero_grad()
  66. outputs = net(inputs)
  67. loss = criterion(outputs, labels)
  68. loss.backward()
  69. optimizer.step()
  70. running_loss += loss.item()
  71. print("Epoch%03d: Training_loss = %.5f" % (epoch + 1, running_loss))
  72. # 保存模型并进行验证
  73. if (epoch + 1) % 10 == 0:
  74. if len(gpu_ids) > 1:
  75. t.save(net.module.state_dict(), "Cifar10_LeNet_Epoch" + str(epoch + 1) + ".pth")
  76. else:
  77. t.save(net.state_dict(), "Cifar10_LeNet_Epoch" + str(epoch + 1) + ".pth")
  78. correct = 0
  79. all = 0
  80. with t.no_grad():
  81. for (inputs, labels) in testloader:
  82. inputs, labels = inputs.to(device), labels.to(device)
  83. outputs = net(inputs)
  84. _, predicted = t.max(outputs, 1)
  85. all += inputs.size()[0]
  86. correct += (predicted == labels).sum().item()
  87. print("###" * 15)
  88. print("Epoch%03d: TestSet_Accuracy = %.3f" % (epoch + 1, correct / all))
  89. print("###" * 15)
  90. print("Train Done!")