lrCostFunction.m (logistic regression cost function)
J = 1./m*(-y'*log(sigmoid(X*theta))-(1-y')*log(1-sigmoid(X*theta))); %计算代价函数J
J = J + lambda/(2*m)*(sum(theta.^2)-theta(1).^2); %引入正则项,注意去掉常数项系数
grad = 1./m*X'*(sigmoid(X*theta)-y); %计算梯度
grad = grad + lambda/m*theta; %引入正则项
grad(1) = grad(1) - lambda/m*theta(1); %去掉常数项
oneVsAll.m
initial_theta = zeros(n+1,1);
options = optimset('GradObj', 'on', 'MaxIter', 50);
for c = 1:num_labels
[theta] = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)),initial_theta, options);
all_theta(c,:) = theta';
end
predictOneVsAll.m
[c,p] = max(sigmoid(X*all_theta'),[],2); %每行取最大值,最大的就是对应标签
predict.m
X = [ones(m,1) X]; %追加常数项
a2 = sigmoid(X*Theta1'); %第一隐藏层的输出
a2 = [ones(size(a2,1),1) a2];
a3 = (sigmoid(a2*Theta2')); %第二隐藏层的输出
[c, p] = max(a3, [], 2); %选择每行中最大的值作为预测