粤车牌abcdef的排列顺序应该如何，以满足百度优化标准？，

「人工智能」CNN+TensorFlow实现车牌识别之训练模型构建

在上一篇《「人工智能」CNN+TensorFlow实现车牌识别之生成车牌数据》中，笔者使用pygame完成了5万张车牌数据的生成。

文章链接：https://www.toutiao.com/i6490140686917042701/

有了数据之后，接下来开始正式构建基于CNN的车牌识别训练模型。模型参考了网络上多篇文章的识别模型，并根据实际需要进行了修改。

主要参考文章链接：https://www.cnblogs.com/ydf0509/p/6916435.html

需要注意的知识点是：

1. 读取数据

一开始是想使用TensorFlow的TFRecord，结果发现使用起来问题非常多，可操作的自由度也低，谷歌自身提供的简单数据集也没有用TFRecord，所以从时间成本考虑，暂时放弃了TFRecord来做数据集，不过面对大量数据时的低内存需求和高性能读取，可能最终还是需要把这个研究一下的。

2. 图片的调整

使用的读取图片的包不同，调整图片的大小的方法也不同。用过PIL，skimage，最后感觉skimage比较顺手

3. batch数据与Tensor的转换

其实就是最后使用TensorFlow的API的时候，其需求的参数和返回的数据，需要根据实际需求区分是要一般变量还是Tensor变量。这里遇到了一些麻烦，后面有时间作为单独一个点总结一下。

4. 文本向量转换

英文的转换比较简单，用Unicode code就行。中文就不太适合，这里我自己编的码，所以还涉及到了字典索引的使用。

训练模型图示

采用3个卷积层，最大池化，一个全连接输出层

上代码：

import tensorflow as tf

import numpy as np

import glob

import random

from PIL import Image

from skimage import io

from skimage import transform,data

from PNReader import *

number = <'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'>

ALPHABET = <'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',

'V', 'W', 'X', 'Y', 'Z'>

hanzi = <'京', '津', '沪', '渝', '冀', '豫', '云', '辽', '黑', '湘', '皖', '鲁', '新', '苏', '浙', '赣', '鄂', '桂', '甘', '晋', '蒙', '陕',

'吉', '闽', '贵', '粤', '青', '藏', '川', '宁', '琼'>

dicthz = {'京': 462, '津': 463, '沪': 464, '渝': 465, '冀': 466, '豫': 467, '云': 468, '辽': 469, '黑': 470, '湘': 471, '皖': 472, '鲁': 473, '新': 474, '苏': 475, '浙': 476, '赣': 477, '鄂': 478, '桂': 479, '甘': 480, '晋': 481, '蒙': 482, '陕': 483, '吉': 484, '闽': 485, '贵': 486, '粤': 487, '青': 488, '藏': 489, '川': 490, '宁': 491, '琼': 492}

# 文本转向量

char_set = number + ALPHABET + hanzi + <'_'> + <'_'> + <'_'> + <'_'> # 如果验证码长度小于4, '_'用来补齐

# char_set = number + ALPHABET + <'_'> + <'_'> # 如果验证码长度小于4, '_'用来补齐

CHAR_SET_LEN = len(char_set)

# CHAR_SET_LEN = 7

image_path = "/../images/"

test_path = "/../images/"

testlabel_path = "/../labels.txt"

label_path = "/../labels.txt"

PATH = "/../images/*.jpg"

# 图像大小

IMAGE_HEIGHT = 40 # 45

IMAGE_WIDTH = 100 # 180

MAX_NP = 7

# CHAR_SET_LEN = 7

image_file = "/Users/adminjackfy/downloads/trainlibs/chepai/images/藏GPEI07.jpg"

def getPicture(path):

return glob.glob(path)

def getSplitData(path):

result = getPicture(path)

length = len(result)

trainLengh=int(length*0.9)

train = result<0:trainLengh>

test =result

#train = result<0:int(length * 0.8)>

#test =

return train, test

def sampleTrain(length,trainData):

return random.sample(trainData,length)

def getImageAndName(path):

name = path.split("/")<-1>.split(".")<0>

# captcha_image = Image.open(path)

# captcha_image = np.array(captcha_image)

# with tf.name_scope('read_image'):

img = 1.0 - io.imread(path, as_grey=True)

img = transform.resize(img,(40, 100))

# io.imsave('/Users/adminjackfy/downloads/trainlibs/chepai/test/images/last.jpg',img)

# tf.summary.image('read_image', img, 3)

return name, img

def get_next_batch(data):

batch_size = len(data)

batch_x = np.zeros()

batch_y = np.zeros()

for i in range(batch_size):

text, image = getImageAndName(data)

# image = convert2gray(image)

# batch_x = image.flatten() / 255 # (image.flatten()-128)/128 mean为0

batch_x = image.flatten()

batch_y = text2vec(text)

return batch_x, batch_y

def text2vec(text): text_len = len(text) if text_len > MAX_NP: raise ValueError('验证码最长4个字符') vector = np.zeros(MAX_NP * CHAR_SET_LEN) def char2pos(c): if c == '_': k = 62 return k k = ord(c) - 48 if k > 9: k = ord(c) - 55 if k > 35: k = ord(c) - 61 if k > 61: # 说明是中文字 k = dicthz # raise ValueError('No Map') return k for i, c in enumerate(text): #print text idx = i * CHAR_SET_LEN + char2pos(c) # print("idx" + str(idx)) #print i,CHAR_SET_LEN,char2pos(c),idx vector = 1 return vector# 向量转回文本def vec2text(vec): # char_pos = vec.nonzero()<0> char_pos = vec text = <> for i, c in enumerate(char_pos): char_at_pos = i # c/63 char_idx = c % CHAR_SET_LEN if char_idx < 10: char_code = char_idx + ord('0') zm = chr(char_code) elif char_idx < 36: char_code = char_idx - 10 + ord('A') zm = chr(char_code) elif char_idx < 62: char_code = char_idx - 36 + ord('a') zm = chr(char_code) elif char_idx == 62: char_code = ord('_') zm = chr(char_code) else: zm = "N" # 说明是中文 for key in dicthz: if char_idx == dicthz: zm = key # raise ValueError('error') text.append(zm) return "".join(text)
X = tf.placeholder(tf.float32, ) #None代表不限条数的输入Y = tf.placeholder(tf.float32, )keep_prob = tf.placeholder(tf.float32) # dropoutdef variable_summaries(var, name): """Attach a lot of summaries to a Tensor.""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean/' + name, mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) tf.summary.scalar('sttdev/' + name, stddev) tf.summary.scalar('max/' + name, tf.reduce_max(var)) tf.summary.scalar('min/' + name, tf.reduce_min(var)) tf.summary.histogram(name, var)def detect_np_cnn(X, keep_prob, w_alpha=0.01, b_alpha=0.1): with tf.name_scope('image_input'): x = tf.reshape(X, shape=<-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1>) tf.summary.image('image_input', x, 3)
with tf.name_scope('input_cnn_filter1'): with tf.name_scope('input_weight1'): w_c1 = tf.Variable(tf.truncated_normal(<3, 3, 1, 32>, stddev=0.1)) variable_summaries(w_c1, 'input_cnn_filter1/input_weight1') with tf.name_scope('input_biases1'): b_c1 = tf.Variable(tf.constant(0.1, shape=<32>)) variable_summaries(b_c1, 'input_cnn_filter1/input_biases1') conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=<1, 1, 1, 1>, padding='SAME'), b_c1)) tf.summary.histogram('input_cnn_filter1', conv1) conv1 = tf.nn.max_pool(conv1, ksize=<1, 2, 2, 1>, strides=<1, 2, 2, 1>, padding='SAME') conv1 = tf.nn.dropout(conv1, keep_prob)# print(conv1.get_shape())with tf.name_scope('input_cnn_filter2'): with tf.name_scope('input_weight2'): w_c2 = tf.Variable(tf.truncated_normal(<3, 3, 32, 64>, stddev=0.1)) variable_summaries(w_c2, 'input_cnn_filter2/input_weight2') with tf.name_scope('input_biases2'): b_c2 = tf.Variable(tf.constant(0.1, shape=<64>)) variable_summaries(b_c2, 'input_cnn_filter2/input_biases2')conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=<1, 1, 1, 1>, padding='SAME'), b_c2))tf.summary.histogram('input_cnn_filter2', conv2)conv2 = tf.nn.max_pool(conv2, ksize=<1, 2, 2, 1>, strides=<1, 2, 2, 1>, padding='SAME')conv2 = tf.nn.dropout(conv2, keep_prob)# print(conv2.get_shape())with tf.name_scope('input_cnn_filter3'): with tf.name_scope('input_weight3'): w_c3 = tf.Variable(tf.truncated_normal(<3, 3, 64, 64>, stddev=0.1)) variable_summaries(w_c3, 'input_cnn_filter3/input_weight3') with tf.name_scope('input_biases3'): b_c3 = tf.Variable(tf.constant(0.1, shape=<64>)) variable_summaries(b_c3, 'input_cnn_filter3/input_biases3')conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=<1, 1, 1, 1>, padding='SAME'), b_c3))tf.summary.histogram('input_cnn_filter1', conv3)conv3 = tf.nn.max_pool(conv3, ksize=<1, 2, 2, 1>, strides=<1, 2, 2, 1>, padding='SAME')conv3 = tf.nn.dropout(conv3, keep_prob)# print(conv3.get_shape())# Fully connected layerwith tf.name_scope('input_fully_layer'): with tf.name_scope('input_fully_weight'): w_d = tf.Variable(tf.truncated_normal(<5 * 13 * 64, 1024>, stddev=0.1)) variable_summaries(w_d, 'input_fully_layer/input_fully_weight') with tf.name_scope('input_fully_biases'): b_d = tf.Variable(tf.constant(0.1, shape=<1024>)) variable_summaries(b_d, 'input_fully_layer/input_fully_biases')dense = tf.reshape(conv3, <-1, w_d.get_shape().as_list()<0>>)dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))tf.summary.histogram('input_fully_layer', dense)dense = tf.nn.dropout(dense, keep_prob)with tf.name_scope('output'): with tf.name_scope('output_w'): w_out = tf.Variable(tf.truncated_normal(<1024, MAX_NP * CHAR_SET_LEN>, stddev=0.1)) variable_summaries(w_out, 'output/output_w') with tf.name_scope('output_b'): b_out = tf.Variable(tf.constant(0.1, shape=)) variable_summaries(b_out, 'output/output_b')out = tf.add(tf.matmul(dense, w_out), b_out)tf.summary.histogram('output', out)# out = tf.nn.softmax(out)return out
traindata, testdata = getSplitData(PATH)def train_detect_np_cnn(max_step=200): X = tf.placeholder(tf.float32, ) Y = tf.placeholder(tf.float32, ) keep_prob = tf.placeholder(tf.float32) # dropout output = detect_np_cnn(X, keep_prob) # loss # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=Y)) with tf.name_scope('loss'): loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y)) tf.summary.scalar('loss', loss) # 可视化观看常量 # 最后一层用来分类的softmax和sigmoid有什么不同？ # optimizer 为了加快训练 learning_rate应该开始大，然后慢慢衰 with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) predict = tf.reshape(output, <-1, MAX_NP, CHAR_SET_LEN>) max_idx_p = tf.argmax(predict, 2) YY = tf.reshape(Y, <-1, MAX_NP, CHAR_SET_LEN>) max_idx_l = tf.argmax(YY, 2) correct_pred = tf.equal(max_idx_p, max_idx_l) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('accuracy', accuracy) # 可视化观看常量 saver = tf.train.Saver() with tf.Session() as sess: # 合并到Summary中 merged = tf.summary.merge_all() # 选定可视化存储目录 writer = tf.summary.FileWriter("/Users/adminjackfy/downloads/trainlibs/chepai/", sess.graph) test_writer = tf.summary.FileWriter("/Users/adminjackfy/downloads/trainlibs/chepai/test/", sess.graph) sess.run(tf.global_variables_initializer()) step = 0 while True: batch_x, batch_y = get_next_batch(sampleTrain(128, traindata)) _, lossSize = sess.run(, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8}) # writer.add_summary(summary, step) if step % 5 == 0: print("step is:" + str(step), u"损失函数大小为" + str(lossSize)) batch_x_test, batch_y_test = get_next_batch(testdata) summary, acc = sess.run(, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.}) writer.add_summary(summary, step) # print(sess.run(max_idx_p, feed_dict={X:batch_x_test, Y: batch_y_test, keep_prob: 1.})) # print(sess.run(max_idx_l, feed_dict={X:batch_x_test, Y: batch_y_test, keep_prob: 1.})) print("step is:" + str(step), "acc is :" + str(acc)) if step == max_step: saver.save(sess, "./model/crack_capcha.model") break step += 1 # 训练
def predict(testdata): X = tf.placeholder(tf.float32, ) keep_prob = tf.placeholder(tf.float32) output = detect_np_cnn(X, keep_prob) saver = tf.train.Saver() with tf.Session() as sess: # sess.run(tf.global_variables_initializer()) saver.restore(sess, tf.train.latest_checkpoint('./model/')) batch_size = len(testdata) count = 0 for i in range(batch_size): text, image = getImageAndName(testdata) # image = convert2gray(image) # captcha_image = image.flatten() / 255 captcha_image = image.flatten() predict = tf.argmax(tf.reshape(output, <-1, MAX_NP, CHAR_SET_LEN>), 2) text_list = sess.run(predict, feed_dict={X: , keep_prob: 1}) predict_text = text_list<0>.tolist() predict_value = vec2text(predict_text) flag = text == predict_value if flag: count += 1 print("真实值: {}, 预测值: {}, 是否相等: {}".format(text, predict_value, flag)) print('\n识别结果: {}/{}={}'.format(count, batch_size, count / batch_size))

if __name__ == '__main__': train_detect_np_cnn(max_step=10000) #训练10000次 #predict(testdata)
采用CPU模式训练一次要一天多，使用GPU模式大几个小时。Accu可以上到90%。

这里我已经把可视化的模型加入进来了。可视化具体内容请期待后续文章。

如果您觉得本文还有价值，烦请关注+收藏，笔者将带来更多关于人工智能的实践内容。

「人工智能」CNN+TensorFlow实现车牌识别之训练模型构建

最新文章

本站推荐