neural-network - que - tensorflow redes neuronales

¿Cómo aplicar Drop Out en Tensorflow para mejorar la precisión de la red neuronal? (2)

El punto clave aquí es que:

layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer keep_prob = tf.placeholder(tf.float32) # DROP-OUT here drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(layer_1, weights_out) + biases_out

Se convierte en:

layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(drop_out, weights_out) + biases_out

Donde drop_out se usa en la línea final como opuesto a layer_1. Como esto, de lo contrario, ignoraría la línea de abandono.

Drop-Out es técnicas de regularización. Y deseo aplicarlo a los datos de notMNIST para reducir el sobreajuste y finalizar mi asignación de Udacity Deep Learning Course. He leído los documentos de tensorflow sobre cómo llamar al tf.nn.dropout . Y aquí está mi código

# before proceeding further. from __future__ import print_function import numpy as np import tensorflow as tf from six.moves import cPickle as pickle pickle_file = ''notMNIST.pickle'' with open(pickle_file, ''rb'') as f: save = pickle.load(f) train_dataset = save[''train_dataset''] train_labels = save[''train_labels''] valid_dataset = save[''valid_dataset''] valid_labels = save[''valid_labels''] test_dataset = save[''test_dataset''] test_labels = save[''test_labels''] del save # hint to help gc free up memory print(''Training set'', train_dataset.shape, train_labels.shape) print(''Validation set'', valid_dataset.shape, valid_labels.shape) print(''Test set'', test_dataset.shape, test_labels.shape) image_size = 28 num_labels = 10 def reformat(dataset, labels): dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32) # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...] labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32) return dataset, labels train_dataset, train_labels = reformat(train_dataset, train_labels) valid_dataset, valid_labels = reformat(valid_dataset, valid_labels) test_dataset, test_labels = reformat(test_dataset, test_labels) print(''Training set'', train_dataset.shape, train_labels.shape) print(''Validation set'', valid_dataset.shape, valid_labels.shape) print(''Test set'', test_dataset.shape, test_labels.shape) def accuracy(predictions, labels): return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0]) # ReLU neuron # param training_epochs = 30 batch_size = 521 display_step = 1 n_input = 784 # img shape: 28*28 n_classes = 10 # MNIST total classes (0-9 digits) # hyper-parameter n_hidden_1 = 256 learning_rate = 0.05 lambda_term = 0.01 graph = tf.Graph() with graph.as_default(): # init weights weights_hiden = tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=np.sqrt(n_input))) weights_out = tf.Variable(tf.random_normal([n_hidden_1, n_classes], stddev=np.sqrt(n_hidden_1))) biases_hidden = tf.Variable(tf.random_normal([n_hidden_1])) biases_out = tf.Variable(tf.random_normal([n_classes])) x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) def model(x, weights_hiden, weights_out, biases_hidden, biases_out): # hidden layer with RELU activation layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer keep_prob = tf.placeholder(tf.float32) # DROP-OUT here drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(layer_1, weights_out) + biases_out return out_layer # Construct model pred = model(x, weights_hiden, weights_out, biases_hidden, biases_out) # Define loss and optimizer cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y) + lambda_term * tf.nn.l2_loss(weights_hiden) + lambda_term * tf.nn.l2_loss(weights_out) + lambda_term * tf.nn.l2_loss(biases_hidden) + lambda_term * tf.nn.l2_loss(biases_out)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # run the graph with tf.Session(graph=graph) as sess: tf.initialize_all_variables().run() print(''Initialized'') # Training cycle for epoch in range(training_epochs): avg_cost = 0. total_batch = int(train_dataset.shape[0]/batch_size) # Loop over all batches for i in range(total_batch): batch_x = train_dataset[(i*batch_size):((i*batch_size) + batch_size), :] batch_y = train_labels[(i*batch_size):((i*batch_size) + batch_size), :] # Run optimization op (backprop) and cost op (to get loss value) _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y}) # Compute average loss avg_cost += c / total_batch # Display logs per epoch step if epoch % display_step == 0: print("Epoch:", ''%04d'' % (epoch+1), "cost=", "{:.9f}".format(avg_cost)) print("Optimization Finished!") # Test model correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) # Calculate accuracy accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels})) print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels}))

El tf.nn.dropout se llama en el model() función model() , pero después de aplicar la técnica DropOut a la red neuronal, la precisión parecía haber cambiado, este es el resultado:

Epoch: 0001 cost= 579980.086977807 Epoch: 0002 cost= 238859.802382506 Epoch: 0003 cost= 90672.733752856 Epoch: 0004 cost= 32649.040985028 Epoch: 0005 cost= 11325.878361874 Epoch: 0006 cost= 3866.805511076 Epoch: 0007 cost= 1357.785540469 Epoch: 0008 cost= 519.381747333 Epoch: 0009 cost= 225.359804119 Epoch: 0010 cost= 110.099476707 Epoch: 0011 cost= 55.212384386 Epoch: 0012 cost= 28.469241683 Epoch: 0013 cost= 14.511494627 Epoch: 0014 cost= 6.567228943 Epoch: 0015 cost= 3.186372240 Epoch: 0016 cost= 1.701917576 Epoch: 0017 cost= 1.041632473 Epoch: 0018 cost= 0.843376874 Epoch: 0019 cost= 0.786183911 Epoch: 0020 cost= 0.775412846 Epoch: 0021 cost= 0.782965020 Epoch: 0022 cost= 0.796788171 Epoch: 0023 cost= 0.814522117 Epoch: 0024 cost= 0.832090579 Epoch: 0025 cost= 0.849197715 Epoch: 0026 cost= 0.867473578 Epoch: 0027 cost= 0.889561496 Epoch: 0028 cost= 0.921837020 Epoch: 0029 cost= 16.655304543 Epoch: 0030 cost= 1.421570476 Optimization Finished! Test data accuracy: 0.8775 Valid data accuracy: 0.8069

¿Cómo puedo aplicar DropOut de Tensorflow para mejorar la precisión de la red? ¡Gracias!

En el gráfico, sugiero mover keep_prob = tf.placeholder(tf.float32) fuera de la función del model para que sea global.

with graph.as_default(): ... x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) keep_prob = tf.placeholder(tf.float32) def model(x, weights_hiden, weights_out, biases_hidden, biases_out, keep_prob): # hidden layer with RELU activation layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(drop_out, weights_out) + biases_out return out_layer ...

Cuando se ejecuta la session , alimente un valor keep_prob deseado durante el tiempo de entrenamiento, y alimente 1.0 a keep_prob durante el tiempo de referencia (validación y / o prueba).

# run the graph with tf.Session(graph=graph) as sess: tf.initialize_all_variables().run() ... for epoch in range(training_epochs): ... for i in range(total_batch): batch_x = ... batch_y = ... # Run optimization op (backprop) and cost op (to get loss value) # Feed a value < 1.0 for keep prob during training _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob : 0.5}) ... # Feed 1.0 for keep prob during testing print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels, keep_prob : 1.0})) print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels, keep_prob : 1.0}))