# 深度学习利用RCNN识别不定长计算题验证码（pytorch版，LSTM + CTCLoss）

#### 这里继上篇利用迁移学习resnet训练计算题后，对于如果有多位或者不定长位数的验证码问题难以解决了，所以采用RNN解决不定长序列的计算题验证码问题，这种解决方式同样适用于不定长4,6位验证码，接下来看详细步骤

class NumberDataset(Dataset):
def __init__(self, path: str, transform=None, ):
"""
如果想封装一个 train = True/False 都可以，随便搞
:param path: 数据集路径
:param transform:
"""
super(NumberDataset, self).__init__()

if not transform:
transform = transforms.Compose([transforms.ToTensor(), ])
self.transform = transform
self.path = path
self.picture_list = list(os.walk(self.path))[0][-1]
self.label_map = [i for i in "_" + "".join(calc_list)]

def __len__(self):
return len(self.picture_list)

def __getitem__(self, item):
"""
:param item: ID
:return:  (图片，标签)
"""
img = Image.open(picture_path_list[item])
img = self.transform(img)
label = self.picture_list[item].split('_')[0]
# 设置最大长度，不足位在后面补_ ，暂确为7，后期应封装好
for i in range(6 - len(label)):
label += '_'
label = [self.label_map.index(i) for i in label]
label = torch.as_tensor(label, dtype=torch.int64)

# padding = torch.LongTensor([0] * (4 -

return img, label, len(label)

return [self.path + '/' + i for i in self.picture_list]

### 2、搭建训练模型

class RestNetBasicBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super(RestNetBasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
self.bn2 = nn.BatchNorm2d(out_channels)

def forward(self, x):
output = self.conv1(x)
output = F.relu(self.bn1(output))
output = self.conv2(output)
output = self.bn2(output)
return F.relu(x + output)

class RestNetDownBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super(RestNetDownBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride[0], padding=1)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride[1], padding=1)
self.bn2 = nn.BatchNorm2d(out_channels)
self.extra = nn.Sequential(
nn.BatchNorm2d(out_channels)
)

def forward(self, x):
extra_x = self.extra(x)
output = self.conv1(x)
out = F.relu(self.bn1(output))

out = self.conv2(out)
out = self.bn2(out)
return F.relu(extra_x + out)

class resnet18(nn.Module):
def __init__(self):
super(resnet18, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.bn1 = nn.BatchNorm2d(64)

self.layer1 = nn.Sequential(RestNetBasicBlock(64, 64, 1),
RestNetBasicBlock(64, 64, 1))

self.layer2 = nn.Sequential(RestNetDownBlock(64, 128, [2, 1]),
RestNetBasicBlock(128, 128, 1))

self.layer3 = nn.Sequential(RestNetDownBlock(128, 256, [2, 1]),
RestNetBasicBlock(256, 256, 1))

self.layer4 = nn.Sequential(RestNetDownBlock(256, 512, [2, 1]),
RestNetBasicBlock(512, 512, 1))

def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
# out = self.layer3(out)
# out = self.layer4(out)
return out

class LstmNet(nn.Module):
def __init__(self, image_shape, label_map_length):
super(LstmNet, self).__init__()
# resnet18
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = nn.Sequential(RestNetBasicBlock(64, 64, 1),
RestNetBasicBlock(64, 64, 1))
self.layer2 = nn.Sequential(RestNetDownBlock(64, 128, [2, 1]),
RestNetBasicBlock(128, 128, 1))
self.layer3 = nn.Sequential(RestNetDownBlock(128, 256, [2, 1]),
RestNetBasicBlock(256, 256, 1))
self.layer4 = nn.Sequential(RestNetDownBlock(256, 512, [2, 1]),
RestNetBasicBlock(512, 512, 1))
# 计算shape
x = torch.zeros((1, 3) + image_shape)  # [1, 3, 64, 160]
shape = resnet18()(x).shape  # [1, 256, 4, 10] BATCH, DIM, HEIGHT, WIDTH
# print(shape)
bone_output_shape = shape[1] * shape[2]
self.lstm = nn.LSTM(bone_output_shape, bone_output_shape, num_layers=1, bidirectional=True)
self.fc = nn.Linear(bone_output_shape * 2, label_map_length)

def forward(self, x):
x = self.conv1(x)
x = self.layer1(x)
x = self.layer2(x)
# x = self.layer3(x)
# x = self.layer4(x)  # [20, 512, 50, 150]
# print(x.shape)
x = x.permute(3, 0, 1, 2)  # [10, 1, 256, 4] [150, 20, 512, 50]
# print(x.shape)
w, b, c, h = x.shape
x = x.view(w, b, c * h)  # [10, 1, 256 * 4] time_step batch_size input
# print(x.shape)
x, _ = self.lstm(x)
time_step, batch_size, hidden = x.shape  # [10, 1, 2048]  time_step batch_size hidden
x = x.view(time_step * batch_size, hidden)
x = self.fc(x)  # [time_step * batch_size, label_map_length]
return x.view(time_step, batch_size, -1)  # [time_step, batch_size, label_map_length] [10, 1, 37]

### 3、开始训练

mapping = "_" + "".join(calc_list)
device = torch.device('cuda:1')
model = LstmNet((100, 300), len(mapping)).to(device)
loss_func = nn.CTCLoss()
if os.path.exists('./models/model_rcnn.pkl'):

transform = transforms.Compose(
[
transforms.Resize((100, 300), ),
transforms.ToTensor(),  # 变张量
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 标准化
]
)

for epoch in range(30):
train_data = NumberDataset('./datasets_rcnn', transform=transform)
total_loss = []
model.train()
for idx, (input, label, _) in bar:
# 梯度设置为0 优化器置空
#  计算预测值
input = input.to(device)

label = label.to(device)
output = model(input)
# for i in range(output.shape[1]):
#     output = output[:, i, :]  # [10, 37]
#
#     output = output.max(dim=0)  # [10]
#     # output = output.contiguous()
#     print(output[-1])
#     exit()
predict_lengths = torch.IntTensor([int(output.shape[0])] * label.shape[0])
#  获取交叉熵损失结果
loss = loss_func(output, label, predict_lengths, _)
#  反向传播
loss.backward()
total_loss.append(loss.item())
#  优化器参数更新
optimizer.step()
#  打印数据

# if idx % 50 == 0:
bar.set_description("epcoh:{} idx:{},loss:{:.6f}".format(epoch, idx, np.mean(total_loss)))
if idx % 200 == 0:
torch.save(model.state_dict(), './models/model_rcnn.pkl', _use_new_zipfile_serialization=True)  # 模型保存
torch.save(optimizer.state_dict(), './models/optimizer_rcnn.pkl', _use_new_zipfile_serialization=True)  # 优化器保存
torch.save(model.state_dict(), './models/model_rcnn.pkl', _use_new_zipfile_serialization=True)  # 模型保存
torch.save(optimizer.state_dict(), './models/optimizer_rcnn.pkl', _use_new_zipfile_serialization=True)  # 优化器保存