# 深度学习图像识别计算题验证码（pytorch版）

#### 其实识别计算题验证码和识四六位定长验证码原理是一样的，利用深度学习识别的话方法有很多，可以利用RNN，可以利用目标检测，但是这里我就拿最简单的CNN来举例，这种简单的方式希望能引导大家，去实现更多的方式，先来看下数据集

``````import torchvision.transforms as T
import torch.utils.data as data

class ImgData(data.Dataset):
def __init__(self, path):
super().__init__()
imgs = os.listdir(path)
labels = {str(i): i for i in range(10)}
labels.update({"+": 10, "-": 11, "\$": 12})
self.__dict__.update(locals())

def __getitem__(self, index):
# 标签转换为独热编码
label = self.imgs[index].split(".")[0][:3]
label = [[self.labels[str(i)]] for i in label]
label = torch.Tensor(label)
one_hot = torch.zeros(label.size(0), 13).long()
one_hot.scatter_(dim=1, index=label.long(), src=torch.ones(label.size(0), 13).long())
one_hot = one_hot.to(torch.float32)
# 读取图像
img = Image.open(os.path.join(self.path, self.imgs[index]))
img = T.ToTensor()(img)
return (one_hot, img)

def __len__(self):
return len(self.imgs)

``````

### 2、搭建训练模型

``````class ImgModel(nn.Module):
def __init__(self):
super().__init__()

self.Pool1 = nn.MaxPool2d(2, 1)
self.Drop1 = nn.Dropout(0.2)

self.Flatten = nn.Flatten()

self.Fc1 = nn.Linear(421504, 39)

def Sequential(self, input_size, output_size, kernel_size=3, stride=1, padding=1):
stage = nn.Sequential(
nn.BatchNorm2d(output_size),
nn.ReLU()
)
return stage

def forward(self, x):
x = self.Stage1(x)
x = self.Stage2(x)

x = self.Pool1(x)

x = self.Flatten(x)
x = self.Fc1(x)
x = x.reshape(x.size()[0], 3, 13)
return x
``````

### 3、开始训练

``````    DEVICE = torch.device("cuda:1")
BATCH_SIZE = 1
train_set = ImgData("./datasets/anhui/images")

# 加载模型和超参数
model = ImgModel()
model = model.to(DEVICE)
losses = nn.MSELoss()
scheduler = StepLR(
optimizer=optimizer,
step_size=20,  # 设定调整的间隔数
gamma=0.95,  # 系数
last_epoch=-1
)

model.train()
for epoch in range(300):
running_loss = 0.

label, img = data
label, img = label.to(DEVICE), img.to(DEVICE)
pred = model(img)
loss = losses(pred, label)
loss.backward()
running_loss += loss.item()

optimizer.step()

# scheduler.step(running_loss)

# scheduler.step()
print("epoch: %s loss: %s" % (epoch, running_loss))

# 保存模型
checkpoint = {
"net": model.state_dict(),
'optimizer': optimizer.state_dict(),
"epoch": epoch
}
torch.save(checkpoint, './models/ckpt_best.pth')

``````

### 4、输出部分loss

``````epoch: 0 loss: 0.0002137473008190227
epoch: 1 loss: 0.00017065600528454183
epoch: 2 loss: 0.00015853689117051317
epoch: 3 loss: 0.0001568195396330907
epoch: 4 loss: 0.00015941860091572835
epoch: 5 loss: 0.00015824139602926834
epoch: 6 loss: 0.00015571212547982327
epoch: 7 loss: 0.00015360863142888093
epoch: 8 loss: 0.00015596995165623184
epoch: 9 loss: 0.00015566498088759317
epoch: 10 loss: 0.00015361718873663222
epoch: 11 loss: 0.0001493654517012999
epoch: 12 loss: 0.00015319680674963365
epoch: 13 loss: 0.0001504135071854762
epoch: 14 loss: 0.00015071407296218808
epoch: 15 loss: 0.00014917661473035082
epoch: 16 loss: 0.00014558360170013884
epoch: 17 loss: 0.00014495838560080188
epoch: 18 loss: 0.00014095358103105582
epoch: 19 loss: 0.0001448862797381878

``````

### 5、预测结果

``````DEVICE = torch.device("cuda:1")
path_checkpoint = "./models/ckpt_best.pth"  # 断点路径
model = ImgModel()
model = model.to(DEVICE)

tru_labels = {i: str(i) for i in range(10)}
tru_labels.update({10: "+", 11: "-", 12: "\$"})

trues = 0
sums = 0

valid_path = "./datasets/valid"
model.eval()
for p in os.listdir(valid_path):

img = Image.open(os.path.join(valid_path, p))
img = T.ToTensor()(img)
img = img.to(DEVICE)
img = torch.unsqueeze(img, 0)
labels = model(img)[0]
preds = ""
for label in labels:
pred = tru_labels[int(torch.argmax(label).item())]
preds += pred
if preds == p.split(".")[0][:3]:
trues += 1
else:
print(preds, p)

sums += 1

print("准确个数： %s 总数 %s 准确率 %s " % (trues, sums, trues / sums))

8-2 8-7 (3).png

``````