26 完整的模型训练

Argmax

输入两张图片,通过outputs得到预测类别Preds

将Preds与Inputs target比较。

[false, true].sum()=1,false看成0,true看成1

image-20230722155229401
1
2
3
4
5
6
7
import torch

outputs = torch.tensor([[0.1,0.2],
[0.3,0.4]])
# 1表示横向看
print(outputs.argmax(1))
# tensor([1, 1])

image-20230722155601779

image-20230722155647986

1
2
3
4
5
6
7
8
9
10
11
12
13
import torch

outputs = torch.tensor([[0.1,0.2],
[0.3,0.4]])
# 1表示横向看
preds = outputs.argmax(1)
targets = torch.tensor([0, 1])
print(preds == targets)
print((preds == targets).sum())
'''
tensor([False, True])
tensor(1)
'''

完整代码

model.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# 搭建神经网络
import torch
from torch import nn

class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)

def forward(self, x):
x = self.model(x)
return x

if __name__ == '__main__':
tudui = Tudui()
input = torch.ones((64, 3, 32, 32))
output = tudui(input)
print(output.shape)

训练和测试代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import  torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter

from model import *
from torch.utils.data import DataLoader

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root = "./data", train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10(root = "./data", train=False, transform=torchvision.transforms.ToTensor(), download=True)

# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 字符串格式化
# 如果train_size = 10, 训练数据集长度为:10
print("训练数据集的长度为: {}".format(train_data_size))
print("测试数据集的长度为: {}".format(test_data_size))

# 利用 DataLoader 来加载数据集
train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)

# 创建网络模型
tudui = Tudui()

# 创建损失函数
loss_fn = nn.CrossEntropyLoss()

# 优化器
# learning_rate = 0.01
# 科学计数法
learning_rate = 1e-2
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)

# 设置训练网络的参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10

# 添加tensorboard
writer = SummaryWriter("logs_train")
for i in range(epoch):
print("-----------第 {} 轮训练开始----------".format(i + 1))
# 训练开始
for data in train_dataloader:
imgs, targets = data
output = tudui(imgs)
loss = loss_fn(output, targets)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_train_step = total_train_step + 1
# item()可以把形如tensor(5)的类型转换成数字5
# 每100步骤打印
if total_train_step % 100 == 0:
print("训练次数 {}, Loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)

# 不需要调优,取消梯度
# 测试步骤开始
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss: {}".format(total_test_loss))
# 总正确率/测试集长度
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy, total_test_step)
total_test_step = total_test_step + 1
# 保存每一个epoch的结果
torch.save(tudui, "tudui_{}.pth".format(i))
print("模型已保存")

writer.close()
'''
-----------第 10 轮训练开始----------
训练次数 7100, Loss: 1.28587007522583
训练次数 7200, Loss: 0.9727596640586853
训练次数 7300, Loss: 1.1144425868988037
训练次数 7400, Loss: 0.8997210264205933
训练次数 7500, Loss: 1.2414882183074951
训练次数 7600, Loss: 1.2502361536026
训练次数 7700, Loss: 0.8779300451278687
训练次数 7800, Loss: 1.2825963497161865
整体测试集上的Loss: 194.9348732829094
整体测试集上的正确率: 0.5584999918937683
模型已保存
'''
image-20230722162840740

注意点

train()和eval()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# 训练开始
tudui.train()
for data in train_dataloader:
imgs, targets = data
output = tudui(imgs)
loss = loss_fn(output, targets)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_train_step = total_train_step + 1
# item()可以把形如tensor(5)的类型转换成数字5
# 每100步骤打印
if total_train_step % 100 == 0:
print("训练次数 {}, Loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)

# 不需要调优,取消梯度
# 测试步骤开始
tudui.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss: {}".format(total_test_loss))
# 总正确率/测试集长度
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy, total_test_step)
total_test_step = total_test_step + 1
# 保存每一个epoch的结果
torch.save(tudui, "tudui_{}.pth".format(i))
print("模型已保存")

27 利用GPU训练(一)

image-20230722163612035

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import  torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import time

# 准备数据集
train_data = torchvision.datasets.CIFAR10(root = "./data", train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10(root = "./data", train=False, transform=torchvision.transforms.ToTensor(), download=True)

# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 字符串格式化
# 如果train_size = 10, 训练数据集长度为:10
print("训练数据集的长度为: {}".format(train_data_size))
print("测试数据集的长度为: {}".format(test_data_size))

# 利用 DataLoader 来加载数据集
train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)

# 创建网络模型
# 搭建神经网络
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)

def forward(self, x):
x = self.model(x)
return x

if __name__ == '__main__':
tudui = Tudui()
input = torch.ones((64, 3, 32, 32))
output = tudui(input)
print(output.shape)


tudui = Tudui()
if torch.cuda.is_available():
tudui = tudui.cuda()

# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
loss_fn = loss_fn.cuda()
# 优化器
# learning_rate = 0.01
# 科学计数法
learning_rate = 1e-2
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)

# 设置训练网络的参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10

# 添加tensorboard
writer = SummaryWriter("logs_train")
start_time = time.time()
for i in range(epoch):
print("-----------第 {} 轮训练开始----------".format(i + 1))
# 训练开始
tudui.train()
for data in train_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
output = tudui(imgs)
loss = loss_fn(output, targets)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_train_step = total_train_step + 1
# item()可以把形如tensor(5)的类型转换成数字5
# 每100步骤打印
if total_train_step % 100 == 0:
end_time = time.time()
print(end_time - start_time)
print("训练次数 {}, Loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)

# 不需要调优,取消梯度
# 测试步骤开始
tudui.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
if torch.cuda.is_available():
imgs = imgs.cuda()
targets = targets.cuda()
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss: {}".format(total_test_loss))
# 总正确率/测试集长度
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy, total_test_step)
total_test_step = total_test_step + 1
# 保存每一个epoch的结果
torch.save(tudui, "tudui_{}.pth".format(i))
print("模型已保存")

writer.close()
'''
-----------第 1 轮训练开始----------
6.235116958618164
训练次数 100, Loss: 2.292055368423462
'''

Goole Colaboratory

打开GPU

image-20230722165519327 image-20230722165546244

代码前加!表示不用python语法,用终端语法

image-20230722165905600 image-20230722165935223

28 利用GPU训练(二)

image-20230722170622579
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import  torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import time

# 定义训练的设备
device = torch.device("cpu")
# 准备数据集
train_data = torchvision.datasets.CIFAR10(root = "./data", train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_data = torchvision.datasets.CIFAR10(root = "./data", train=False, transform=torchvision.transforms.ToTensor(), download=True)

# length 长度
train_data_size = len(train_data)
test_data_size = len(test_data)
# 字符串格式化
# 如果train_size = 10, 训练数据集长度为:10
print("训练数据集的长度为: {}".format(train_data_size))
print("测试数据集的长度为: {}".format(test_data_size))

# 利用 DataLoader 来加载数据集
train_dataloader = DataLoader(train_data, 64)
test_dataloader = DataLoader(test_data, 64)

# 创建网络模型
# 搭建神经网络
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)

def forward(self, x):
x = self.model(x)
return x

if __name__ == '__main__':
tudui = Tudui()
input = torch.ones((64, 3, 32, 32))
output = tudui(input)
print(output.shape)


tudui = Tudui()
tudui = tudui.to(device)

# 创建损失函数
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
# 优化器
# learning_rate = 0.01
# 科学计数法
learning_rate = 1e-2
optimizer = torch.optim.SGD(tudui.parameters(), lr=learning_rate)

# 设置训练网络的参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10

# 添加tensorboard
writer = SummaryWriter("logs_train")
start_time = time.time()
for i in range(epoch):
print("-----------第 {} 轮训练开始----------".format(i + 1))
# 训练开始
tudui.train()
for data in train_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
output = tudui(imgs)
loss = loss_fn(output, targets)
# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_train_step = total_train_step + 1
# item()可以把形如tensor(5)的类型转换成数字5
# 每100步骤打印
if total_train_step % 100 == 0:
end_time = time.time()
print(end_time - start_time)
print("训练次数 {}, Loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)

# 不需要调优,取消梯度
# 测试步骤开始
tudui.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
imgs = imgs.to(device)
targets = targets.to(device)
outputs = tudui(imgs)
loss = loss_fn(outputs, targets)
total_test_loss = total_test_loss + loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy = total_accuracy + accuracy
print("整体测试集上的Loss: {}".format(total_test_loss))
# 总正确率/测试集长度
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", total_accuracy, total_test_step)
total_test_step = total_test_step + 1
# 保存每一个epoch的结果
torch.save(tudui, "tudui_{}.pth".format(i))
print("模型已保存")

writer.close()

device写法

1
2
3
device = torch.device("cuda")
device = torch.device("cuda:0")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

29 完整模型验证

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import torch
import torchvision
from PIL import Image
from torch import nn

image_path = "./img/dog.png"
image = Image.open(image_path)
print(image)
# png是四个通道,除了RGB三通道外还有一个透明度通道
# 调用convert保留其颜色通道
image = image.convert('RGB')

transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
image = transform(image)
print(image.shape)


class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 32, 5, 1, 2),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, 5, 1, 2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)

def forward(self, x):
x = self.model(x)
return x
# 加载模型
# cpu上想使用gpu训练的模型需要映射
model = torch.load("tudui_0.pth", map_location=torch.device('cpu'))
print(model)

image = torch.reshape(image, (1,3,32,32))
model.eval()
with torch.no_grad():
output = model(image)
print(output)

print(output.argmax(1))

30 阅读开源项目

pytorch-CycleGAN-and-pix2pix

1
2
parser.add_argument('--dataroot', required=True, help='path to images (should have subfolders trainA, trainB, valA, valB, etc)')

image-20230722201917644

requered=True表明一定需要这个参数

可以把其改成default,就可以在pycharm中右键运行

1
2
# 修改后
parser.add_argument('--dataroot', default="./dataset/maps", help='path to images (should have subfolders trainA, trainB, valA, valB, etc)')