11 条回复
对示例输入,我的输出跟示例不一样,用TF得到的结果也跟示例不一样。我的代码和TF的结果是一样的,所以估计理解错了题意?
做题的时候的代码:
# coding=utf-8 #!/bin/python import sys import os import math #请完成下面的函数,实现题目要求的功能 #当然,你也可以不按照下面这个模板来作答,完全按照自己的想法来 ^-^ #******************************开始写代码****************************** def sigmoid(inputs): return [1.0 / (1.0 + math.exp(-x)) for x in inputs] #sigmoid 的导数 def dsigmoid(outputs, grad_outputs): return [x * (1.0 - x) * y for x, y in zip(outputs, grad_outputs)] #损失函数 def loss(y, label): return sum((a-b)**2 for a, b in zip(y, label)) / 2. #损失函数的导数 def dloss(y, label): return [a-b for a, b in zip(y, label)] #矩阵乘法 def matmul(inputs, M, n, m): output = [0.0] * m for i in range(n): for j in range(m): output[j] += inputs[i] * M[i][j] return output #矩阵乘法导数 def dmatmul(inputs, M, n, m, grad_outputs): grad_M = [[0.0]*m for _ in range(n)] grad_input = [0.0] * n for i in range(n): for j in range(m): grad_M[i][j] += grad_outputs[j] * inputs[i] grad_input[i] += grad_outputs[j] * M[i][j] return grad_input, grad_M #更新矩阵 def update(M, grad_M, n, m): for i in range(n): for j in range(m): M[i][j] -= 0.5 * grad_M[i][j] def bpnn(N, I, H, O, inputs, targets): I += 1 W_1 = [[0.0] * H for _ in range(I)] W_2 = [[0.0] * O for _ in range(H)] res = [] for _input, _target in zip(inputs, targets): _input += [1.0] _hidden = matmul(_input, W_1, I, H) _actived_hidden = sigmoid(_hidden) _output = matmul(_actived_hidden, W_2, H, O) _actived_ouput = sigmoid(_output) _loss = loss(_actived_ouput, _target) _grad_actived_output = dloss(_actived_ouput, _target) _grad_output = dsigmoid(_actived_ouput, _grad_actived_output) _grad_actived_hidden, _grad_W_2 = dmatmul(_actived_hidden, W_2, H, O, _grad_output) _grad_hidden = dsigmoid(_actived_hidden, _grad_actived_hidden) _, _grad_W_1 = dmatmul(_input, W_1, I, H, _grad_hidden) update(W_1, _grad_W_1, I, H) update(W_2, _grad_W_2, H, O) res.append(_loss) return res #******************************结束写代码****************************** _input = raw_input() _N,_I,_H,_O = _input.split() _N = int(_N) _I = int(_I) _H = int(_H) _O = int(_O) _inputs = [] _targets = [] for _inputs_i in xrange(_N): _inputs_temp = map(int,raw_input().strip().split(' ')) _inputs.append(_inputs_temp) _targets_temp = map(int,raw_input().strip().split(' ')) _targets.append(_targets_temp) res = bpnn(_N, _I, _H, _O, _inputs, _targets) for res_cur in res: print "%.3f" % res_cur
TensorFlow的代码:
import tensorflow as tf I, H, O = 3, 2, 1 _input = tf.placeholder(tf.float32, [1, I]) _target = tf.placeholder(tf.float32, [1, O]) W_1 = tf.Variable(tf.zeros([3, 2])) W_2 = tf.Variable(tf.zeros([2, 1])) _hidden = tf.sigmoid(tf.matmul(_input, W_1)) _output = tf.sigmoid(tf.matmul(_hidden, W_2)) _loss = tf.nn.l2_loss(_output-_target) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(_loss) inputs = [[0, 0], [0, 1], [1, 0], [1, 1]] targets = [[1], [1], [1], [0]] sess = tf.Session() sess.run(tf.global_variables_initializer()) for x, y in zip(inputs, targets): x += [1.] x = [x] y = [y] _, loss, w1, w2 = sess.run([train_step, _loss, W_1, W_2], feed_dict={_input: x, _target: y}) print('%.3f' % loss)
double lr = 0.5; vector < double > bpnn(int N, int I, int H, int O, vector < vector < int > > inputs, vector < vector < int > > targets, vector<vector<double>>& W1, vector<vector<double>>& W2) { vector<double> diff; for (int i = 0; i < N; ++i){ //前向传播 vector<double> deltaO; vector<double> AH(H, 0.0); vector<double> AO(O, 0.0); for (int j = 0; j < H; ++j){ double a = 0; int k = 0; for (; k < I; ++k){ a += inputs[i][k] * W1[k][j]; } a += W1[k][j]; AH[j] = sigmoid(a); } for (int j = 0; j < O; ++j){ double a = 0; for (int k = 0; k < H; ++k){ a += AH[k] * W2[k][j]; } AO[j] = sigmoid(a); } double error = 0; for (int j = 0; j < O; ++j){ deltaO.push_back(AO[j] - targets[i][j]); error += (AO[j] - targets[i][j])*(AO[j] - targets[i][j]); } error = 0.5*error; diff.push_back(error); //求偏导W2 vector<vector<double>> dW2; for (int j = 0; j < H; ++j){ dW2.push_back(vector<double>(O, 0.0)); } vector<double> dAH(H, 0.0); for (int j = 0; j < H; ++j){ double dah = 0.0; for (int k = 0; k < O; ++k){ dW2[j][k] = deltaO[k] * dsigmoid(AO[k]) * AH[j]; dah += deltaO[k] * dsigmoid(AO[k]) * W2[j][k]; } dAH[j] = dah; } //求偏导W1 vector<vector<double>> dW1; for (int j = 0; j <=I; ++j){ dW1.push_back(vector<double>(H, 0.0)); } for (int j = 0; j < I; ++j){ for (int k = 0; k < H; ++k){ dW1[j][k] = dAH[k] * dsigmoid(AH[k])*inputs[i][j]; } } //求偏导偏置 for (int k = 0; k < H; ++k){ dW1[I][k] = dAH[k] * dsigmoid(AH[k]); } //更新权值 for (int j = 0; j <= I; ++j){ for (int k = 0; k < H; ++k){ W1[j][k] -= lr*dW1[j][k]; } } for (int j = 0; j < H; ++j){ for (int k = 0; k < O; ++k){ W2[j][k] -= lr*dW2[j][k]; } } } return diff; }
添加回复