{"id":2228,"date":"2016-10-08T14:07:26","date_gmt":"2016-10-08T06:07:26","guid":{"rendered":"http:\/\/boweihe.me\/?p=2228"},"modified":"2016-10-08T14:07:26","modified_gmt":"2016-10-08T06:07:26","slug":"tensorflow%e5%ad%a6%e4%b9%a0-%e5%8d%95%e9%9a%90%e5%b1%82%e7%a5%9e%e7%bb%8f%e7%bd%91%e7%bb%9crelu","status":"publish","type":"post","link":"https:\/\/dayandcarrot.space\/?p=2228","title":{"rendered":"TensorFlow\u5b66\u4e60:  \u8bad\u7ec3\u4e2aReLu\u795e\u7ecf\u7f51\u7edc(\u5355\u9690\u5c42\u3001\u53cc\u9690\u5c42)"},"content":{"rendered":"<p>\u5176\u5b9e\u662fUdaCity\u4e0a\u7684\u6df1\u5ea6\u5b66\u4e60<a href=\"https:\/\/classroom.udacity.com\/courses\/ud730\/lessons\/6379031992\/concepts\/64244193630923#\" target=\"_blank\" rel=\"noopener noreferrer\">\u516c\u5f00\u8bfe<\/a>\uff0c\u611f\u89c9\u8fd9\u4e2a\u8bb2\u7684\u6700\u7b80\u6d01\u660e\u4e86\u3002<br \/>\n\u4e0b\u9762\u7684\u4ee3\u7801\u8bad\u7ec3\u7684\u662f\u4e00\u4e2a\u5355\u9690\u5c42\u5168\u8fde\u901a\u7684\u5c0f\u5c0f\u795e\u7ecf\u7f51\u7edc\uff0c\u9690\u5c42\u8282\u70b9\u6570\u91cf\u8bbe\u5b9a\u4e3a1024\uff0c\u8f93\u5165\u7684\u56fe\u7247\u662f28*28\u7684\uff0clabel\u6709&#8217;A&#8217;-&#8216;J&#8217;\u517110\u4e2a\uff0c\u6240\u4ee5\u6700\u540e\u7528\u4e86softmax\u3002\u6570\u636e\u4f7f\u7528nonMNIST\u7684\u3002\u53c2\u6570\u66f4\u65b0\u7528\u7684mini-batch\u7684SGD.<br \/>\n<a href=\"http:\/\/boweihe.me\/wp-content\/uploads\/2016\/10\/\u5c4f\u5e55\u5feb\u7167-2016-10-08-\u4e0b\u53482.03.42.png\"><img loading=\"lazy\" decoding=\"async\" class=\"alignnone size-large wp-image-2229\" src=\"http:\/\/boweihe.me\/wp-content\/uploads\/2016\/10\/\u5c4f\u5e55\u5feb\u7167-2016-10-08-\u4e0b\u53482.03.42-1024x278.png\" alt=\"ReLu nonMNIST single hidden layer\" width=\"640\" height=\"174\" \/><\/a><br \/>\n\u4e0b\u9762\u662f\u5173\u952e\u90e8\u5206\u4ee3\u7801\uff0c\u8fd9\u4e2a\u8bfe\u7a0b\u7684\u4e00\u4e2a\u597d\u5904\u662f\u7528Docker+jupyter\u505a\u7684\uff0c\u7ed9\u7b54\u6848\u5f88\u65b9\u4fbf\uff0c\u4ee5\u524d\u4ece\u672a\u4f53\u9a8c\u8fc7\u8fd9\u4e48\u6d41\u7545\u7684\u54c8\u54c8\u54c8\u3002<\/p>\n<pre class=\"lang:python decode:true\" title=\"Graph Definition\">batch_size = 128\nnum_relu_units = 1024\ngraph = tf.Graph()\nwith graph.as_default():\n  # Input data. For the training data, we use a placeholder that will be fed\n  # at run time with a training minibatch.\n  tf_train_dataset = tf.placeholder(tf.float32,\n                                    shape=(batch_size, image_size * image_size))\n  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))\n  tf_valid_dataset = tf.constant(valid_dataset)\n  tf_test_dataset = tf.constant(test_dataset)\n  # Variables.\n  weights = tf.Variable(\n    tf.truncated_normal([image_size * image_size, num_relu_units]))\n  biases = tf.Variable(tf.zeros([num_relu_units]))\n  wt_hidden = tf.Variable(tf.truncated_normal([num_relu_units, num_labels]))\n  b_hidden = tf.Variable(tf.zeros([num_labels]))\n  # Training computation.\n  l1 = tf.matmul(tf_train_dataset, weights) + biases\n  l1 = tf.nn.relu(l1)\n  l2 = tf.matmul(l1, wt_hidden) + b_hidden\n  loss = tf.reduce_mean(\n    tf.nn.softmax_cross_entropy_with_logits(l2, tf_train_labels))\n  # Optimizer.\n  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)\n  # Predictions for the training, validation, and test data.\n  train_prediction = tf.nn.softmax(l2)\n  valid_prediction = tf.nn.softmax(\n    tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights) + biases), wt_hidden) + b_hidden)\n  test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights) + biases), wt_hidden) + b_hidden)\nnum_steps = 3001\nwith tf.Session(graph=graph) as session:\n  tf.initialize_all_variables().run()\n  print(\"Initialized\")\n  for step in range(num_steps):\n    # Pick an offset within the training data, which has been randomized.\n    # Note: we could use better randomization across epochs.\n    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)\n    # Generate a minibatch.\n    batch_data = train_dataset[offset:(offset + batch_size), :]  # mini-batch\n    batch_labels = train_labels[offset:(offset + batch_size), :]\n    # Prepare a dictionary telling the session where to feed the minibatch.\n    # The key of the dictionary is the placeholder node of the graph to be fed,\n    # and the value is the numpy array to feed to it.\n    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}\n    _, l, predictions = session.run(\n      [optimizer, loss, train_prediction], feed_dict=feed_dict)\n    if (step % 500 == 0):\n      print(\"Minibatch loss at step %d: %f\" % (step, l))\n      print(\"Minibatch accuracy: %.1f%%\" % accuracy(predictions, batch_labels))\n      print(\"Validation accuracy: %.1f%%\" % accuracy(\n        valid_prediction.eval(), valid_labels))\n  print(\"Test accuracy: %.1f%%\" % accuracy(test_prediction.eval(), test_labels))<\/pre>\n<p>\u8f93\u51fa\u5982\u4e0b\uff1a<\/p>\n<pre class=\"\">Initialized\nMinibatch loss at step 0: 319.099121\nMinibatch accuracy: 8.6%\nValidation accuracy: 26.0%\nMinibatch loss at step 500: 8.215627\nMinibatch accuracy: 83.6%\nValidation accuracy: 81.8%\nMinibatch loss at step 1000: 11.695193\nMinibatch accuracy: 78.1%\nValidation accuracy: 80.8%\nMinibatch loss at step 1500: 7.294090\nMinibatch accuracy: 83.6%\nValidation accuracy: 79.1%\nMinibatch loss at step 2000: 8.128178\nMinibatch accuracy: 77.3%\nValidation accuracy: 81.5%\nMinibatch loss at step 2500: 3.724820\nMinibatch accuracy: 84.4%\nValidation accuracy: 82.1%\nMinibatch loss at step 3000: 3.041273\nMinibatch accuracy: 86.7%\nValidation accuracy: 81.0%\nTest accuracy: 88.1%<\/pre>\n<p>\u968f\u540e\u53c8\u8bd5\u4e86\u4e00\u4e0b\u6709\u4e24\u4e2a\u9690\u5c42\u7684ReLuNet\uff0c\u9690\u5c42\u8282\u70b9\u6570\u5206\u522b\u662f1024\uff0c512\uff0c\u4f7f\u7528AdamOptimizer\u8bad\u7ec3\uff0c\u4f3c\u4e4e\u51c6\u786e\u5ea6\u53c8\u80fd\u63d0\u9ad8\u4e00\u70b9\u70b9<br \/>\n\u8fed\u4ee3\u6b21\u6570=5000\uff0cminiBatch\u7684\u5927\u5c0f\u6ca1\u6709\u53d8<\/p>\n<pre class=\"lang:python decode:true \">def getPrediction(dataSet, wt0, wt1, wt2, b0, b1, b2):\n    l1 = tf.nn.relu(tf.matmul(dataSet, wt0) + b0)\n    l2 = tf.nn.relu(tf.matmul(l1, wt1) + b1)\n    l3 = tf.matmul(l2, wt2) + b2\n    return tf.nn.softmax(l3)\nbatch_size = 128\nnum_relu_units = 1024\nnum_relu_units_l3 = 512\ndropout_keep_prob = 0.75\ngraph = tf.Graph()\nwith graph.as_default():\n  # Input data. For the training data, we use a placeholder that will be fed\n  # at run time with a training minibatch.\n  tf_train_dataset = tf.placeholder(tf.float32,\n                                    shape=(batch_size, image_size * image_size))\n  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))\n  tf_valid_dataset = tf.constant(valid_dataset)\n  tf_test_dataset = tf.constant(test_dataset)\n  # Variables.\n  weights = tf.Variable(\n    tf.truncated_normal([image_size * image_size, num_relu_units]))\n  biases = tf.Variable(tf.zeros([num_relu_units]))\n  wt_hidden = tf.Variable(tf.truncated_normal([num_relu_units, num_relu_units_l3]))\n  b_hidden = tf.Variable(tf.zeros([num_relu_units_l3]))\n  wt_h_l3 = tf.Variable(tf.truncated_normal([num_relu_units_l3, num_labels]))\n  b_h_l3 = tf.Variable(tf.zeros([num_labels]))\n  # Training computation.\n  l1 = tf.matmul(tf_train_dataset, weights) + biases\n  l1 = tf.nn.relu(l1)\n  #l1 = tf.nn.dropout(l1, dropout_keep_prob)\n  l2 = tf.matmul(l1, wt_hidden) + b_hidden\n  l2 = tf.nn.relu(l2)\n  #l2 = tf.nn.dropout(l2, dropout_keep_prob)\n  l3 = tf.matmul(l2, wt_h_l3) + b_h_l3\n  loss = tf.reduce_mean(\n    tf.nn.softmax_cross_entropy_with_logits(l3, tf_train_labels))\n  # Optimizer.\n  optimizer = tf.train.AdamOptimizer().minimize(loss)\n  # Predictions for the training, validation, and test data.\n  train_prediction = tf.nn.softmax(l3)\n  valid_prediction = getPrediction(tf_valid_dataset, weights, wt_hidden, wt_h_l3,\n                                  biases, b_hidden, b_h_l3) #tf.nn.softmax(\n    #tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights) + biases), wt_hidden) + b_hidden)\n  test_prediction = getPrediction(tf_test_dataset, weights, wt_hidden, wt_h_l3,\n                                  biases, b_hidden, b_h_l3)\n    #tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights) + biases), wt_hidden) + b_hidden)\nnum_steps = 5000\nwith tf.Session(graph=graph) as session:\n  tf.initialize_all_variables().run()\n  print(\"Initialized\")\n  for step in range(num_steps):\n    # Pick an offset within the training data, which has been randomized.\n    # Note: we could use better randomization across epochs.\n    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)\n    # Generate a minibatch.\n    batch_data = train_dataset[offset:(offset + batch_size), :]  # mini-batch\n    batch_labels = train_labels[offset:(offset + batch_size), :]\n    # Prepare a dictionary telling the session where to feed the minibatch.\n    # The key of the dictionary is the placeholder node of the graph to be fed,\n    # and the value is the numpy array to feed to it.\n    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}\n    _, l, predictions = session.run(\n      [optimizer, loss, train_prediction], feed_dict=feed_dict)\n    if (step % 500 == 0):\n      print(\"Minibatch loss at step %d: %f\" % (step, l))\n      print(\"Minibatch accuracy: %.1f%%\" % accuracy(predictions, batch_labels))\n      print(\"Validation accuracy: %.1f%%\" % accuracy(\n        valid_prediction.eval(), valid_labels))\n  print(\"Test accuracy: %.1f%%\" % accuracy(test_prediction.eval(), test_labels))<\/pre>\n<pre class=\"\">Initialized\nMinibatch loss at step 0: 3763.713867\nMinibatch accuracy: 10.2%\nValidation accuracy: 11.6%\nMinibatch loss at step 500: 218.475220\nMinibatch accuracy: 78.9%\nValidation accuracy: 79.3%\nMinibatch loss at step 1000: 289.750031\nMinibatch accuracy: 78.1%\nValidation accuracy: 80.2%\nMinibatch loss at step 1500: 171.686737\nMinibatch accuracy: 84.4%\nValidation accuracy: 81.1%\nMinibatch loss at step 2000: 123.215240\nMinibatch accuracy: 85.2%\nValidation accuracy: 82.1%\nMinibatch loss at step 2500: 57.080734\nMinibatch accuracy: 89.8%\nValidation accuracy: 82.4%\nMinibatch loss at step 3000: 63.220982\nMinibatch accuracy: 85.9%\nValidation accuracy: 83.0%\nMinibatch loss at step 3500: 95.992943\nMinibatch accuracy: 82.0%\nValidation accuracy: 83.1%\nMinibatch loss at step 4000: 69.324394\nMinibatch accuracy: 86.7%\nValidation accuracy: 83.2%\nMinibatch loss at step 4500: 75.464554\nMinibatch accuracy: 82.0%\nValidation accuracy: 83.7%\nTest accuracy: 90.4%<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u5176\u5b9e\u662fUdaCity\u4e0a\u7684\u6df1\u5ea6\u5b66\u4e60\u516c\u5f00\u8bfe\uff0c\u611f\u89c9\u8fd9\u4e2a\u8bb2\u7684\u6700\u7b80\u6d01\u660e\u4e86\u3002 \u4e0b\u9762\u7684\u4ee3\u7801\u8bad\u7ec3\u7684\u662f\u4e00\u4e2a\u5355\u9690\u5c42\u5168\u8fde\u901a\u7684\u5c0f\u5c0f\u795e\u7ecf\u7f51\u7edc\uff0c\u9690\u5c42\u8282\u70b9\u6570\u91cf\u8bbe\u5b9a\u4e3a1024\uff0c\u8f93\u5165\u7684\u56fe\u7247\u662f28*28\u7684\uff0clabel\u6709&#8217;A&#8217;-&#8216;J&#8217;\u517110\u4e2a\uff0c\u6240\u4ee5\u6700\u540e\u7528\u4e86softmax\u3002\u6570\u636e\u4f7f\u7528nonMNIST\u7684\u3002\u53c2\u6570\u66f4\u65b0\u7528\u7684mini-batch\u7684SGD. \u4e0b\u9762\u662f\u5173\u952e\u90e8\u5206\u4ee3\u7801\uff0c\u8fd9\u4e2a\u8bfe\u7a0b\u7684\u4e00\u4e2a\u597d\u5904\u662f\u7528Docker+jupyter\u505a\u7684\uff0c\u7ed9\u7b54\u6848\u5f88\u65b9\u4fbf\uff0c\u4ee5\u524d\u4ece\u672a\u4f53\u9a8c\u8fc7\u8fd9\u4e48\u6d41\u7545\u7684\u54c8\u54c8\u54c8\u3002 batch_size = 128 num_relu_units = 1024 graph = tf.Graph() with graph.as_default(): # Input data. For the training data, we use a placeholder that will be fed # at run time with a training minibatch. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[110,182],"class_list":["post-2228","post","type-post","status-publish","format-standard","hentry","category-study","tag-tensorflow","tag-182"],"_links":{"self":[{"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=\/wp\/v2\/posts\/2228","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2228"}],"version-history":[{"count":0,"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=\/wp\/v2\/posts\/2228\/revisions"}],"wp:attachment":[{"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2228"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2228"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/dayandcarrot.space\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2228"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}