其实是UdaCity上的深度学习公开课,感觉这个讲的最简洁明了。
下面的代码训练的是一个单隐层全连通的小小神经网络,隐层节点数量设定为1024,输入的图片是28*28的,label有’A’-‘J’共10个,所以最后用了softmax。数据使用nonMNIST的。参数更新用的mini-batch的SGD.
下面是关键部分代码,这个课程的一个好处是用Docker+jupyter做的,给答案很方便,以前从未体验过这么流畅的哈哈哈。
batch_size = 128 num_relu_units = 1024 graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed # at run time with a training minibatch. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. weights = tf.Variable( tf.truncated_normal([image_size * image_size, num_relu_units])) biases = tf.Variable(tf.zeros([num_relu_units])) wt_hidden = tf.Variable(tf.truncated_normal([num_relu_units, num_labels])) b_hidden = tf.Variable(tf.zeros([num_labels])) # Training computation. l1 = tf.matmul(tf_train_dataset, weights) + biases l1 = tf.nn.relu(l1) l2 = tf.matmul(l1, wt_hidden) + b_hidden loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(l2, tf_train_labels)) # Optimizer. optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(l2) valid_prediction = tf.nn.softmax( tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights) + biases), wt_hidden) + b_hidden) test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights) + biases), wt_hidden) + b_hidden) num_steps = 3001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print("Initialized") for step in range(num_steps): # Pick an offset within the training data, which has been randomized. # Note: we could use better randomization across epochs. offset = (step * batch_size) % (train_labels.shape[0] - batch_size) # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] # mini-batch batch_labels = train_labels[offset:(offset + batch_size), :] # Prepare a dictionary telling the session where to feed the minibatch. # The key of the dictionary is the placeholder node of the graph to be fed, # and the value is the numpy array to feed to it. feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if (step % 500 == 0): print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy( valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
输出如下:
Initialized Minibatch loss at step 0: 319.099121 Minibatch accuracy: 8.6% Validation accuracy: 26.0% Minibatch loss at step 500: 8.215627 Minibatch accuracy: 83.6% Validation accuracy: 81.8% Minibatch loss at step 1000: 11.695193 Minibatch accuracy: 78.1% Validation accuracy: 80.8% Minibatch loss at step 1500: 7.294090 Minibatch accuracy: 83.6% Validation accuracy: 79.1% Minibatch loss at step 2000: 8.128178 Minibatch accuracy: 77.3% Validation accuracy: 81.5% Minibatch loss at step 2500: 3.724820 Minibatch accuracy: 84.4% Validation accuracy: 82.1% Minibatch loss at step 3000: 3.041273 Minibatch accuracy: 86.7% Validation accuracy: 81.0% Test accuracy: 88.1%
随后又试了一下有两个隐层的ReLuNet,隐层节点数分别是1024,512,使用AdamOptimizer训练,似乎准确度又能提高一点点
迭代次数=5000,miniBatch的大小没有变
def getPrediction(dataSet, wt0, wt1, wt2, b0, b1, b2): l1 = tf.nn.relu(tf.matmul(dataSet, wt0) + b0) l2 = tf.nn.relu(tf.matmul(l1, wt1) + b1) l3 = tf.matmul(l2, wt2) + b2 return tf.nn.softmax(l3) batch_size = 128 num_relu_units = 1024 num_relu_units_l3 = 512 dropout_keep_prob = 0.75 graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed # at run time with a training minibatch. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. weights = tf.Variable( tf.truncated_normal([image_size * image_size, num_relu_units])) biases = tf.Variable(tf.zeros([num_relu_units])) wt_hidden = tf.Variable(tf.truncated_normal([num_relu_units, num_relu_units_l3])) b_hidden = tf.Variable(tf.zeros([num_relu_units_l3])) wt_h_l3 = tf.Variable(tf.truncated_normal([num_relu_units_l3, num_labels])) b_h_l3 = tf.Variable(tf.zeros([num_labels])) # Training computation. l1 = tf.matmul(tf_train_dataset, weights) + biases l1 = tf.nn.relu(l1) #l1 = tf.nn.dropout(l1, dropout_keep_prob) l2 = tf.matmul(l1, wt_hidden) + b_hidden l2 = tf.nn.relu(l2) #l2 = tf.nn.dropout(l2, dropout_keep_prob) l3 = tf.matmul(l2, wt_h_l3) + b_h_l3 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(l3, tf_train_labels)) # Optimizer. optimizer = tf.train.AdamOptimizer().minimize(loss) # Predictions for the training, validation, and test data. train_prediction = tf.nn.softmax(l3) valid_prediction = getPrediction(tf_valid_dataset, weights, wt_hidden, wt_h_l3, biases, b_hidden, b_h_l3) #tf.nn.softmax( #tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights) + biases), wt_hidden) + b_hidden) test_prediction = getPrediction(tf_test_dataset, weights, wt_hidden, wt_h_l3, biases, b_hidden, b_h_l3) #tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights) + biases), wt_hidden) + b_hidden) num_steps = 5000 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print("Initialized") for step in range(num_steps): # Pick an offset within the training data, which has been randomized. # Note: we could use better randomization across epochs. offset = (step * batch_size) % (train_labels.shape[0] - batch_size) # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] # mini-batch batch_labels = train_labels[offset:(offset + batch_size), :] # Prepare a dictionary telling the session where to feed the minibatch. # The key of the dictionary is the placeholder node of the graph to be fed, # and the value is the numpy array to feed to it. feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels} _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if (step % 500 == 0): print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Validation accuracy: %.1f%%" % accuracy( valid_prediction.eval(), valid_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
Initialized Minibatch loss at step 0: 3763.713867 Minibatch accuracy: 10.2% Validation accuracy: 11.6% Minibatch loss at step 500: 218.475220 Minibatch accuracy: 78.9% Validation accuracy: 79.3% Minibatch loss at step 1000: 289.750031 Minibatch accuracy: 78.1% Validation accuracy: 80.2% Minibatch loss at step 1500: 171.686737 Minibatch accuracy: 84.4% Validation accuracy: 81.1% Minibatch loss at step 2000: 123.215240 Minibatch accuracy: 85.2% Validation accuracy: 82.1% Minibatch loss at step 2500: 57.080734 Minibatch accuracy: 89.8% Validation accuracy: 82.4% Minibatch loss at step 3000: 63.220982 Minibatch accuracy: 85.9% Validation accuracy: 83.0% Minibatch loss at step 3500: 95.992943 Minibatch accuracy: 82.0% Validation accuracy: 83.1% Minibatch loss at step 4000: 69.324394 Minibatch accuracy: 86.7% Validation accuracy: 83.2% Minibatch loss at step 4500: 75.464554 Minibatch accuracy: 82.0% Validation accuracy: 83.7% Test accuracy: 90.4%