tutorial - redes neuronales python pdf

Enfoque de matriz completa para la propagación inversa en la red neuronal artificial (1)

Aquí está mi código. El tiempo necesario para iterar 30 épocas se reduce de 800+ segundos a más de 200 segundos en mi máquina.

Como soy nuevo en Python, uso lo que está disponible. Este fragmento solo requiere Numpy para ejecutarse.

Darle una oportunidad.

def feedforward2(self, a): zs = [] activations = [a] activation = a for b, w in zip(self.biases, self.weights): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) return (zs, activations) def update_mini_batch2(self, mini_batch, eta): batch_size = len(mini_batch) # transform to (input x batch_size) matrix x = np.asarray([_x.ravel() for _x, _y in mini_batch]).transpose() # transform to (output x batch_size) matrix y = np.asarray([_y.ravel() for _x, _y in mini_batch]).transpose() nabla_b, nabla_w = self.backprop2(x, y) self.weights = [w - (eta / batch_size) * nw for w, nw in zip(self.weights, nabla_w)] self.biases = [b - (eta / batch_size) * nb for b, nb in zip(self.biases, nabla_b)] return def backprop2(self, x, y): nabla_b = [0 for i in self.biases] nabla_w = [0 for i in self.weights] # feedforward zs, activations = self.feedforward2(x) # backward pass delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) nabla_b[-1] = delta.sum(1).reshape([len(delta), 1]) # reshape to (n x 1) matrix nabla_w[-1] = np.dot(delta, activations[-2].transpose()) for l in xrange(2, self.num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp nabla_b[-l] = delta.sum(1).reshape([len(delta), 1]) # reshape to (n x 1) matrix nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)

Estoy aprendiendo Artificial Neural Network (ANN) recientemente y tengo un código que funciona y se ejecuta en Python por el mismo basado en entrenamiento de mini lotes. Seguí el libro de Neural Networks and Deep Learning de Michael Nilson donde hay una explicación paso a paso de cada uno de los algoritmos para principiantes. También hay un código completamente funcional para el reconocimiento de dígitos escritos a mano que también funciona bien para mí.

Sin embargo, estoy tratando de modificar un poco el código al pasar todo el mini-lote juntos para entrenar por retropropagación en la forma de la matriz. También he desarrollado un código de trabajo para eso, pero el código funciona muy lento cuando se ejecuta. ¿Hay alguna manera de que pueda implementar un enfoque completo basado en la matriz para el aprendizaje de mini-lotes de la red basado en el algoritmo de propagación de retorno?

import numpy as np import pandas as pd class Network: def __init__(self, sizes): self.layers = len(sizes) self.sizes = sizes self.biases = [np.random.randn(y, 1) for y in sizes[1:]] self.weights = [np.random.randn(y, x) for y, x in zip(sizes[1:], sizes[:-1])] def feed_forward(self, a): for w, b in zip(self.weights, self.biases): a = sigmoid(np.dot(w,a) + b) return a # Calculate the cost derivative (Gradient of C w.r.t. ''a'' - Nabla C(a)) def cost_derivative(self, output_activation, y): return (output_activation - y) def update_mini_batch(self, mini_batch, eta): from scipy.linalg import block_diag n = len(mini_batch) xs = [x for x, y in mini_batch] features = block_diag(*xs) ys = [y for x, y in mini_batch] responses = block_diag(*ys) ws = [a for a in self.weights for i in xrange(n)] new_list = [] k = 0 while (k < len(ws)): new_list.append(ws[k: k + n]) k += n weights = [block_diag(*elems) for elems in new_list] bs = [b for b in self.biases for i in xrange(n)] new_list2 = [] j = 0 while (j < len(bs)): new_list2.append(bs[j : j + n]) j += n biases = [block_diag(*elems) for elems in new_list2] baises_dim_1 = [np.dot(np.ones((n*b.shape[0], b.shape[0])), b) for b in self.biases] biases_dim_2 = [np.dot(b, np.ones((b.shape[1], n*b.shape[1]))) for b in baises_dim_1] weights_dim_1 = [np.dot(np.ones((n*w.shape[0], w.shape[0])), w) for w in self.weights] weights_dim_2 = [np.dot(w, np.ones((w.shape[1], n*w.shape[1]))) for w in weights_dim_1] nabla_b = [np.zeros(b.shape) for b in biases_dim_2] nabla_w = [np.zeros(w.shape) for w in weights_dim_2] delta_b = [np.zeros(b.shape) for b in self.biases] delta_w = [np.zeros(w.shape) for w in self.weights] zs = [] activation = features activations = [features] for w, b in zip(weights, biases): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) delta = self.cost_derivative(activations[-1], responses) * sigmoid_prime(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) for l in xrange(2, self.layers): z = zs[-l] # the weighted input for that layer activation_prime = sigmoid_prime(z) # the derivative of activation for the layer delta = np.dot(weights[-l + 1].transpose(), delta) * activation_prime # calculate the adjustment term (delta) for that layer nabla_b[-l] = delta # calculate the bias adjustments - by means of using eq-BP3. nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) # calculate the weight adjustments - by means of using eq-BP4. delta_b = [self.split_cases(b, n) for b in nabla_b] delta_w = [self.split_cases(w, n) for w in nabla_w] self.weights = [w - (eta/n) * nw for w, nw in zip(self.weights, delta_w)] self.biases = [b - (eta/ n) * nb for b, nb in zip(self.biases, delta_b)] def split_cases(self, mat, mini_batch_size): i = 0 j = 0 dim1 = mat.shape[0]/mini_batch_size dim2 = mat.shape[1]/mini_batch_size sum_samples = np.zeros((dim1, dim2)) while i < len(mat): sum_samples = sum_samples + mat[i: i + dim1, j : j + dim2] i += dim1 j += dim2 return sum_samples """Stochastic Gradient Descent for training in epochs""" def SGD(self, training_data, epochs, mini_batch_size, eta, test_data = None): n = len(training_data) if test_data: n_test = len(test_data) for j in xrange(epochs): np.random.shuffle(training_data) # for each epochs the mini-batches are selected randomly mini_batches = [training_data[k: k+mini_batch_size] for k in xrange(0, n, mini_batch_size)] # select equal sizes of mini-batches for the epochs (last mini_batch size might differ however) c = 1 for mini_batch in mini_batches: print "Updating mini-batch {0}".format(c) self.update_mini_batch(mini_batch, eta) c += 1 if test_data: print "Epoch {0}: {1}/{2}".format(j, self.evaluate(test_data), n_test) else: print "Epoch {0} completed.".format(j) def evaluate(self, test_data): test_results = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data] return (sum(int(x == y) for x, y in test_results)) def export_results(self, test_data): results = [(np.argmax(self.feed_forward(x)), y) for (x, y) in test_data] k = pd.DataFrame(results) k.to_csv(''net_results.csv'') # Global functions ## Activation function (sigmoid) @np.vectorize def sigmoid(z): return 1.0/(1.0 + np.exp(-z)) ## Activation derivative (sigmoid_prime) @np.vectorize def sigmoid_prime(z): return sigmoid(z)*(1 - sigmoid(z))