using System;
using System.Collections.Generic;
namespace DropoutDemo
{
class DropoutProgram
{
static void Main(string[] args)
{
Console.WriteLine("\nBegin neural network dropout demo");
Console.WriteLine("\nData is the famous Iris flower set.");
Console.WriteLine("Input is sepal length, width, petal length, width");
Console.WriteLine("Class to predict is species");
Console.WriteLine("setosa = 0 0 1, versicolor = 0 1 0, virginica = 1 0 0 ");
Console.WriteLine("\nRaw data has 150 total items like:\n");
Console.WriteLine(" 5.1, 3.5, 1.4, 0.2, Iris setosa");
Console.WriteLine(" 7.0, 3.2, 4.7, 1.4, Iris versicolor");
Console.WriteLine(" 6.3, 3.3, 6.0, 2.5, Iris virginica");
Console.WriteLine(" ......\n");
Console.WriteLine("Loading 80-20% training-test data");
double[][] trainData = new double[120][];
trainData[0] = new double[] { 6.0, 3.4, 4.5, 1.6, 0, 1, 0 };
trainData[1] = new double[] { 6.7, 2.5, 5.8, 1.8, 1, 0, 0 };
trainData[2] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
trainData[3] = new double[] { 7.7, 2.8, 6.7, 2.0, 1, 0, 0 };
trainData[4] = new double[] { 6.4, 3.2, 5.3, 2.3, 1, 0, 0 };
trainData[5] = new double[] { 6.7, 3.1, 4.7, 1.5, 0, 1, 0 };
trainData[6] = new double[] { 6.3, 3.4, 5.6, 2.4, 1, 0, 0 };
trainData[7] = new double[] { 5.9, 3.2, 4.8, 1.8, 0, 1, 0 };
trainData[8] = new double[] { 6.3, 2.5, 5.0, 1.9, 1, 0, 0 };
trainData[9] = new double[] { 4.6, 3.2, 1.4, 0.2, 0, 0, 1 };
trainData[10] = new double[] { 7.0, 3.2, 4.7, 1.4, 0, 1, 0 };
trainData[11] = new double[] { 6.6, 3.0, 4.4, 1.4, 0, 1, 0 };
trainData[12] = new double[] { 5.7, 2.8, 4.1, 1.3, 0, 1, 0 };
trainData[13] = new double[] { 6.7, 3.0, 5.0, 1.7, 0, 1, 0 };
trainData[14] = new double[] { 6.5, 3.0, 5.2, 2.0, 1, 0, 0 };
trainData[15] = new double[] { 5.1, 3.8, 1.5, 0.3, 0, 0, 1 };
trainData[16] = new double[] { 7.9, 3.8, 6.4, 2.0, 1, 0, 0 };
trainData[17] = new double[] { 5.9, 3.0, 5.1, 1.8, 1, 0, 0 };
trainData[18] = new double[] { 7.3, 2.9, 6.3, 1.8, 1, 0, 0 };
trainData[19] = new double[] { 5.0, 2.0, 3.5, 1.0, 0, 1, 0 };
trainData[20] = new double[] { 6.2, 2.8, 4.8, 1.8, 1, 0, 0 };
trainData[21] = new double[] { 7.4, 2.8, 6.1, 1.9, 1, 0, 0 };
trainData[22] = new double[] { 6.2, 3.4, 5.4, 2.3, 1, 0, 0 };
trainData[23] = new double[] { 5.2, 3.5, 1.5, 0.2, 0, 0, 1 };
trainData[24] = new double[] { 6.8, 3.0, 5.5, 2.1, 1, 0, 0 };
trainData[25] = new double[] { 5.5, 2.6, 4.4, 1.2, 0, 1, 0 };
trainData[26] = new double[] { 6.9, 3.1, 5.1, 2.3, 1, 0, 0 };
trainData[27] = new double[] { 6.4, 2.7, 5.3, 1.9, 1, 0, 0 };
trainData[28] = new double[] { 5.6, 2.7, 4.2, 1.3, 0, 1, 0 };
trainData[29] = new double[] { 4.4, 3.0, 1.3, 0.2, 0, 0, 1 };
trainData[30] = new double[] { 6.9, 3.1, 4.9, 1.5, 0, 1, 0 };
trainData[31] = new double[] { 5.4, 3.0, 4.5, 1.5, 0, 1, 0 };
trainData[32] = new double[] { 5.8, 2.7, 4.1, 1.0, 0, 1, 0 };
trainData[33] = new double[] { 4.6, 3.6, 1.0, 0.2, 0, 0, 1 };
trainData[34] = new double[] { 5.1, 3.5, 1.4, 0.2, 0, 0, 1 };
trainData[35] = new double[] { 4.9, 3.0, 1.4, 0.2, 0, 0, 1 };
trainData[36] = new double[] { 5.1, 3.4, 1.5, 0.2, 0, 0, 1 };
trainData[37] = new double[] { 5.5, 2.4, 3.8, 1.1, 0, 1, 0 };
trainData[38] = new double[] { 6.8, 2.8, 4.8, 1.4, 0, 1, 0 };
trainData[39] = new double[] { 6.7, 3.0, 5.2, 2.3, 1, 0, 0 };
trainData[40] = new double[] { 5.7, 3.0, 4.2, 1.2, 0, 1, 0 };
trainData[41] = new double[] { 6.0, 2.2, 5.0, 1.5, 1, 0, 0 };
trainData[42] = new double[] { 6.5, 2.8, 4.6, 1.5, 0, 1, 0 };
trainData[43] = new double[] { 6.3, 2.5, 4.9, 1.5, 0, 1, 0 };
trainData[44] = new double[] { 6.7, 3.1, 5.6, 2.4, 1, 0, 0 };
trainData[45] = new double[] { 6.4, 2.8, 5.6, 2.1, 1, 0, 0 };
trainData[46] = new double[] { 5.5, 2.4, 3.7, 1.0, 0, 1, 0 };
trainData[47] = new double[] { 5.2, 3.4, 1.4, 0.2, 0, 0, 1 };
trainData[48] = new double[] { 6.0, 2.2, 4.0, 1.0, 0, 1, 0 };
trainData[49] = new double[] { 6.1, 2.8, 4.0, 1.3, 0, 1, 0 };
trainData[50] = new double[] { 6.1, 3.0, 4.6, 1.4, 0, 1, 0 };
trainData[51] = new double[] { 5.0, 3.2, 1.2, 0.2, 0, 0, 1 };
trainData[52] = new double[] { 4.8, 3.4, 1.9, 0.2, 0, 0, 1 };
trainData[53] = new double[] { 6.3, 3.3, 6.0, 2.5, 1, 0, 0 };
trainData[54] = new double[] { 5.0, 3.5, 1.6, 0.6, 0, 0, 1 };
trainData[55] = new double[] { 6.0, 3.0, 4.8, 1.8, 1, 0, 0 };
trainData[56] = new double[] { 6.3, 2.8, 5.1, 1.5, 1, 0, 0 };
trainData[57] = new double[] { 7.2, 3.2, 6.0, 1.8, 1, 0, 0 };
trainData[58] = new double[] { 4.6, 3.4, 1.4, 0.3, 0, 0, 1 };
trainData[59] = new double[] { 6.9, 3.2, 5.7, 2.3, 1, 0, 0 };
trainData[60] = new double[] { 6.5, 3.0, 5.5, 1.8, 1, 0, 0 };
trainData[61] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
trainData[62] = new double[] { 5.1, 3.8, 1.9, 0.4, 0, 0, 1 };
trainData[63] = new double[] { 5.3, 3.7, 1.5, 0.2, 0, 0, 1 };
trainData[64] = new double[] { 5.0, 3.3, 1.4, 0.2, 0, 0, 1 };
trainData[65] = new double[] { 6.6, 2.9, 4.6, 1.3, 0, 1, 0 };
trainData[66] = new double[] { 5.9, 3.0, 4.2, 1.5, 0, 1, 0 };
trainData[67] = new double[] { 5.2, 2.7, 3.9, 1.4, 0, 1, 0 };
trainData[68] = new double[] { 7.7, 3.0, 6.1, 2.3, 1, 0, 0 };
trainData[69] = new double[] { 5.4, 3.9, 1.7, 0.4, 0, 0, 1 };
trainData[70] = new double[] { 5.1, 3.5, 1.4, 0.3, 0, 0, 1 };
trainData[71] = new double[] { 7.2, 3.6, 6.1, 2.5, 1, 0, 0 };
trainData[72] = new double[] { 6.4, 3.2, 4.5, 1.5, 0, 1, 0 };
trainData[73] = new double[] { 4.8, 3.0, 1.4, 0.3, 0, 0, 1 };
trainData[74] = new double[] { 6.2, 2.2, 4.5, 1.5, 0, 1, 0 };
trainData[75] = new double[] { 5.8, 2.7, 3.9, 1.2, 0, 1, 0 };
trainData[76] = new double[] { 7.2, 3.0, 5.8, 1.6, 1, 0, 0 };
trainData[77] = new double[] { 4.7, 3.2, 1.6, 0.2, 0, 0, 1 };
trainData[78] = new double[] { 6.3, 2.3, 4.4, 1.3, 0, 1, 0 };
trainData[79] = new double[] { 6.8, 3.2, 5.9, 2.3, 1, 0, 0 };
trainData[80] = new double[] { 5.0, 2.3, 3.3, 1.0, 0, 1, 0 };
trainData[81] = new double[] { 5.7, 2.5, 5.0, 2.0, 1, 0, 0 };
trainData[82] = new double[] { 7.7, 2.6, 6.9, 2.3, 1, 0, 0 };
trainData[83] = new double[] { 6.5, 3.0, 5.8, 2.2, 1, 0, 0 };
trainData[84] = new double[] { 6.1, 2.8, 4.7, 1.2, 0, 1, 0 };
trainData[85] = new double[] { 4.7, 3.2, 1.3, 0.2, 0, 0, 1 };
trainData[86] = new double[] { 6.9, 3.1, 5.4, 2.1, 1, 0, 0 };
trainData[87] = new double[] { 6.4, 3.1, 5.5, 1.8, 1, 0, 0 };
trainData[88] = new double[] { 6.0, 2.9, 4.5, 1.5, 0, 1, 0 };
trainData[89] = new double[] { 6.4, 2.9, 4.3, 1.3, 0, 1, 0 };
trainData[90] = new double[] { 4.4, 2.9, 1.4, 0.2, 0, 0, 1 };
trainData[91] = new double[] { 5.0, 3.6, 1.4, 0.2, 0, 0, 1 };
trainData[92] = new double[] { 4.4, 3.2, 1.3, 0.2, 0, 0, 1 };
trainData[93] = new double[] { 5.1, 3.7, 1.5, 0.4, 0, 0, 1 };
trainData[94] = new double[] { 4.8, 3.1, 1.6, 0.2, 0, 0, 1 };
trainData[95] = new double[] { 6.5, 3.2, 5.1, 2.0, 1, 0, 0 };
trainData[96] = new double[] { 6.1, 2.9, 4.7, 1.4, 0, 1, 0 };
trainData[97] = new double[] { 5.4, 3.7, 1.5, 0.2, 0, 0, 1 };
trainData[98] = new double[] { 5.7, 3.8, 1.7, 0.3, 0, 0, 1 };
trainData[99] = new double[] { 7.1, 3.0, 5.9, 2.1, 1, 0, 0 };
trainData[100] = new double[] { 5.4, 3.9, 1.3, 0.4, 0, 0, 1 };
trainData[101] = new double[] { 6.1, 2.6, 5.6, 1.4, 1, 0, 0 };
trainData[102] = new double[] { 6.4, 2.8, 5.6, 2.2, 1, 0, 0 };
trainData[103] = new double[] { 5.0, 3.0, 1.6, 0.2, 0, 0, 1 };
trainData[104] = new double[] { 5.8, 2.8, 5.1, 2.4, 1, 0, 0 };
trainData[105] = new double[] { 6.3, 2.9, 5.6, 1.8, 1, 0, 0 };
trainData[106] = new double[] { 6.2, 2.9, 4.3, 1.3, 0, 1, 0 };
trainData[107] = new double[] { 5.5, 3.5, 1.3, 0.2, 0, 0, 1 };
trainData[108] = new double[] { 6.7, 3.1, 4.4, 1.4, 0, 1, 0 };
trainData[109] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
trainData[110] = new double[] { 4.6, 3.1, 1.5, 0.2, 0, 0, 1 };
trainData[111] = new double[] { 5.0, 3.5, 1.3, 0.3, 0, 0, 1 };
trainData[112] = new double[] { 5.5, 2.5, 4.0, 1.3, 0, 1, 0 };
trainData[113] = new double[] { 5.5, 4.2, 1.4, 0.2, 0, 0, 1 };
trainData[114] = new double[] { 5.5, 2.3, 4.0, 1.3, 0, 1, 0 };
trainData[115] = new double[] { 5.2, 4.1, 1.5, 0.1, 0, 0, 1 };
trainData[116] = new double[] { 5.6, 2.5, 3.9, 1.1, 0, 1, 0 };
trainData[117] = new double[] { 5.6, 2.9, 3.6, 1.3, 0, 1, 0 };
trainData[118] = new double[] { 4.9, 2.4, 3.3, 1.0, 0, 1, 0 };
trainData[119] = new double[] { 5.7, 2.8, 4.5, 1.3, 0, 1, 0 };
double[][] testData = new double[30][];
testData[0] = new double[] { 6.0, 2.7, 5.1, 1.6, 0, 1, 0 };
testData[1] = new double[] { 5.1, 3.3, 1.7, 0.5, 0, 0, 1 };
testData[2] = new double[] { 6.7, 3.3, 5.7, 2.1, 1, 0, 0 };
testData[3] = new double[] { 5.1, 2.5, 3.0, 1.1, 0, 1, 0 };
testData[4] = new double[] { 5.6, 2.8, 4.9, 2.0, 1, 0, 0 };
testData[5] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
testData[6] = new double[] { 5.0, 3.4, 1.5, 0.2, 0, 0, 1 };
testData[7] = new double[] { 5.4, 3.4, 1.7, 0.2, 0, 0, 1 };
testData[8] = new double[] { 4.9, 2.5, 4.5, 1.7, 1, 0, 0 };
testData[9] = new double[] { 5.7, 4.4, 1.5, 0.4, 0, 0, 1 };
testData[10] = new double[] { 7.7, 3.8, 6.7, 2.2, 1, 0, 0 };
testData[11] = new double[] { 5.7, 2.9, 4.2, 1.3, 0, 1, 0 };
testData[12] = new double[] { 5.0, 3.4, 1.6, 0.4, 0, 0, 1 };
testData[13] = new double[] { 6.3, 3.3, 4.7, 1.6, 0, 1, 0 };
testData[14] = new double[] { 4.5, 2.3, 1.3, 0.3, 0, 0, 1 };
testData[15] = new double[] { 4.8, 3.4, 1.6, 0.2, 0, 0, 1 };
testData[16] = new double[] { 5.8, 4.0, 1.2, 0.2, 0, 0, 1 };
testData[17] = new double[] { 6.7, 3.3, 5.7, 2.5, 1, 0, 0 };
testData[18] = new double[] { 4.3, 3.0, 1.1, 0.1, 0, 0, 1 };
testData[19] = new double[] { 5.4, 3.4, 1.5, 0.4, 0, 0, 1 };
testData[20] = new double[] { 5.6, 3.0, 4.1, 1.3, 0, 1, 0 };
testData[21] = new double[] { 6.1, 3.0, 4.9, 1.8, 1, 0, 0 };
testData[22] = new double[] { 5.7, 2.6, 3.5, 1.0, 0, 1, 0 };
testData[23] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
testData[24] = new double[] { 5.6, 3.0, 4.5, 1.5, 0, 1, 0 };
testData[25] = new double[] { 4.8, 3.0, 1.4, 0.1, 0, 0, 1 };
testData[26] = new double[] { 5.1, 3.8, 1.6, 0.2, 0, 0, 1 };
testData[27] = new double[] { 7.6, 3.0, 6.6, 2.1, 1, 0, 0 };
testData[28] = new double[] { 6.3, 2.7, 4.9, 1.8, 1, 0, 0 };
testData[29] = new double[] { 5.8, 2.6, 4.0, 1.2, 0, 1, 0 };
Console.WriteLine("\nFirst 5 rows of training data:");
ShowMatrix(trainData, 5, 1, true);
Console.WriteLine("First 3 rows of test data:");
ShowMatrix(testData, 3, 1, true);
Console.WriteLine("\nCreating a 4-input, 9-hidden, 3-output dropout neural network");
Console.WriteLine("Using tanh (hidden) and softmax (output) activations");
const int numInput = 4;
const int numHidden = 9;
const int numOutput = 3;
NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput);
int maxEpochs = 1000;
double learnRate = 0.05;
Console.WriteLine("Setting maxEpochs = 1000, learnRate = 0.05");
Console.WriteLine("No momentum or weight decay");
Console.WriteLine("No early exit on low error condition");
Console.WriteLine("\nBeginning training using back-propagation with dropout\n");
nn.Train(trainData, maxEpochs, learnRate);
Console.WriteLine("Training complete\n");
double[] weights = nn.GetWeights();
Console.WriteLine("Final neural network weights and bias values:");
ShowVector(weights, 10, 3, true);
double trainAcc = nn.Accuracy(trainData);
Console.WriteLine("\nAccuracy on training data = " + trainAcc.ToString("F4"));
double testAcc = nn.Accuracy(testData);
Console.WriteLine("\nAccuracy on test data = " + testAcc.ToString("F4"));
Console.WriteLine("\nEnd dropout demo\n");
Console.ReadLine();
} // Main
static void ShowVector(double[] vector, int valsPerRow, int decimals, bool newLine)
{
for (int i = 0; i < vector.Length; ++i)
{
if (i % valsPerRow == 0) Console.WriteLine("");
Console.Write(vector[i].ToString("F" + decimals).PadLeft(decimals + 4) + " ");
}
if (newLine == true) Console.WriteLine("");
}
static void ShowMatrix(double[][] matrix, int numRows, int decimals, bool newLine)
{
for (int i = 0; i < numRows; ++i)
{
Console.Write(i.ToString().PadLeft(3) + ": ");
for (int j = 0; j < matrix[i].Length; ++j)
{
if (matrix[i][j] >= 0.0) Console.Write(" "); else Console.Write("-"); ;
Console.Write(Math.Abs(matrix[i][j]).ToString("F" + decimals) + " ");
}
Console.WriteLine("");
}
if (newLine == true) Console.WriteLine("");
}
static double[] MySoftmax(double[] oSums) // does all output nodes at once so scale doesn't have to be re-computed each time
{
// determine max output sum
double max = oSums[0];
for (int i = 0; i < oSums.Length; ++i)
if (oSums[i] > max) max = oSums[i];
// determine scaling factor -- sum of exp(each val - max)
double scale = 0.0;
for (int i = 0; i < oSums.Length; ++i)
scale += Math.Exp(oSums[i] - max);
double[] result = new double[oSums.Length];
for (int i = 0; i < oSums.Length; ++i)
result[i] = Math.Exp(oSums[i] - max) / scale;
return result; // now scaled so that xi sum to 1.0
}
} // class Program
public class NeuralNetwork
{
private static Random rnd;
private int numInput;
private int numHidden;
private int numOutput;
private double[] inputs;
private double[][] ihWeights; // input-hidden
private double[] hBiases;
private double[] hOutputs;
private double[][] hoWeights; // hidden-output
private double[] oBiases;
private double[] outputs;
public NeuralNetwork(int numInput, int numHidden, int numOutput)
{
rnd = new Random(0); // multi-purpose, inc. dropout
this.numInput = numInput;
this.numHidden = numHidden;
this.numOutput = numOutput;
this.inputs = new double[numInput];
this.ihWeights = MakeMatrix(numInput, numHidden);
this.hBiases = new double[numHidden];
this.hOutputs = new double[numHidden];
this.hoWeights = MakeMatrix(numHidden, numOutput);
this.oBiases = new double[numOutput];
this.outputs = new double[numOutput];
InitializeWeights(); // set weights and biases to small random values
} // ctor
private static double[][] MakeMatrix(int rows, int cols) // helper for ctor
{
double[][] result = new double[rows][];
for (int r = 0; r < result.Length; ++r)
result[r] = new double[cols];
return result;
}
// public override string ToString() . .
// ----------------------------------------------------------------------------------------
public void SetWeights(double[] weights)
{
// copy weights and biases in weights[] array to i-h weights, i-h biases, h-o weights, h-o biases
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
if (weights.Length != numWeights)
throw new Exception("Bad weights array length: ");
int k = 0; // points into weights param
for (int i = 0; i < numInput; ++i)
for (int j = 0; j < numHidden; ++j)
ihWeights[i][j] = weights[k++];
for (int i = 0; i < numHidden; ++i)
hBiases[i] = weights[k++];
for (int i = 0; i < numHidden; ++i)
for (int j = 0; j < numOutput; ++j)
hoWeights[i][j] = weights[k++];
for (int i = 0; i < numOutput; ++i)
oBiases[i] = weights[k++];
}
private void InitializeWeights()
{
// initialize weights and biases to small random values
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
double[] initialWeights = new double[numWeights];
double lo = -0.01;
double hi = 0.01;
for (int i = 0; i < initialWeights.Length; ++i)
initialWeights[i] = (hi - lo) * rnd.NextDouble() + lo;
this.SetWeights(initialWeights);
}
public double[] GetWeights()
{
// returns the current set of weights, presumably after dropout-training
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
double[] result = new double[numWeights];
int k = 0;
for (int i = 0; i < numInput; ++i)
for (int j = 0; j < numHidden; ++j)
result[k++] = ihWeights[i][j];
for (int i = 0; i < numHidden; ++i)
result[k++] = hBiases[i];
for (int i = 0; i < numHidden; ++i)
for (int j = 0; j < numOutput; ++j)
result[k++] = hoWeights[i][j];
for (int i = 0; i < numOutput; ++i)
result[k++] = oBiases[i];
return result;
}
// ----------------------------------------------------------------------------------------
private int[] MakeDropNodes()
{
List<int> resultList = new List<int>();
for (int i = 0; i < this.numHidden; ++i)
{
double p = rnd.NextDouble();
if (p < 0.50)
resultList.Add(i);
}
if (resultList.Count == 0)
resultList.Add(rnd.Next(0, numHidden));
else if (resultList.Count == numHidden)
resultList.RemoveAt(rnd.Next(0, numHidden));
return resultList.ToArray();
}
private bool IsDropNode(int node, int[] dropNodes)
{
if (dropNodes == null)
return false;
if (Array.BinarySearch(dropNodes, node) >= 0)
return true;
else
return false;
}
private double[] ComputeOutputs(double[] xValues, int[] dropNodes)
{
// skips hidden nodes int dropNodes[]
// if dropNodes[] is null, no nodes are dropped
// i = input index, j = hidden, k = output
if (xValues.Length != numInput)
throw new Exception("Bad xValues array length");
double[] hSums = new double[numHidden]; // hidden nodes sums scratch array
double[] oSums = new double[numOutput]; // output nodes sums
for (int i = 0; i < xValues.Length; ++i) // copy x-values to inputs
this.inputs[i] = xValues[i];
for (int j = 0; j < numHidden; ++j) // each hidden node
{
if (IsDropNode(j, dropNodes) == true) continue; // skip
for (int i = 0; i < numInput; ++i)
hSums[j] += this.inputs[i] * this.ihWeights[i][j]; // accumulate sum (note +=)
hSums[j] += this.hBiases[j]; // add bias
this.hOutputs[j] = HyperTanFunction(hSums[j]); // apply activation
}
for (int k = 0; k < numOutput; ++k) // each output node
{
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue; // skip
oSums[k] += hOutputs[j] * hoWeights[j][k];
}
oSums[k] += oBiases[k]; // add bias
double[] softOut = Softmax(oSums); // softmax activation does all outputs at once for efficiency
Array.Copy(softOut, outputs, softOut.Length);
}
// copy this.outputs to return for calling convenience
double[] retResult = new double[numOutput]; // could define a GetOutputs method instead
Array.Copy(this.outputs, retResult, retResult.Length);
return retResult;
} // ComputeOutputs
private static double HyperTanFunction(double x)
{
if (x < -20.0) return -1.0; // approximation is correct to 30 decimals
else if (x > 20.0) return 1.0;
else return Math.Tanh(x);
}
private static double[] Softmax(double[] oSums) // does all output nodes at once so scale doesn't have to be re-computed each time
{
// determine max output sum
double max = oSums[0];
for (int i = 0; i < oSums.Length; ++i)
if (oSums[i] > max) max = oSums[i];
// determine scaling factor -- sum of exp(each val - max)
double scale = 0.0;
for (int i = 0; i < oSums.Length; ++i)
scale += Math.Exp(oSums[i] - max);
double[] result = new double[oSums.Length];
for (int i = 0; i < oSums.Length; ++i)
result[i] = Math.Exp(oSums[i] - max) / scale;
return result; // now scaled so that xi sum to 1.0
}
// ----------------------------------------------------------------------------------------
private void UpdateWeights(double[] tValues, double learnRate, int[] dropNodes)
{
// update the weights and biases using back-propagation
// assumes that SetWeights and ComputeOutputs have been called
if (tValues.Length != numOutput)
throw new Exception("target values not same Length as output in UpdateWeights");
// back-prop related arrays. could be class members to avoid millions of allocations
double[] hGrads = new double[numHidden];
double[] oGrads = new double[numOutput];
// 1. compute output gradients
for (int k = 0; k < numOutput; ++k)
{
// implicit MSE
double derivative = (1 - outputs[k]) * outputs[k]; // derivative of softmax = (1 - y) * y (same as log-sigmoid)
oGrads[k] = derivative * (tValues[k] - outputs[k]); // 'mean squared error version' includes (1-y)(y) derivative
}
// 2. compute hidden gradients
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue;
double derivative = (1 - hOutputs[j]) * (1 + hOutputs[j]); // derivative of tanh = (1 - y) * (1 + y)
double sum = 0.0;
for (int k = 0; k < numOutput; ++k) // each hidden delta is the sum of numOutput terms
{
double x = oGrads[k] * hoWeights[j][k];
sum += x;
}
hGrads[j] = derivative * sum;
}
//// 3a. update input-hidden weights (gradients must be computed right-to-left but weights can be updated in any order)
//for (int i = 0; i < numInput; ++i) // 0..2 (3)
//{
// for (int j = 0; j < numHidden; ++j) // 0..3 (4)
// {
// if (IsDropNode(j, dropNodes) == true) continue;
// double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta
// ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very tricky.
// }
//}
//// 3b. update hidden biases
//for (int j = 0; j < numHidden; ++j)
//{
// if (IsDropNode(j, dropNodes) == true) continue;
// double delta = learnRate * hGrads[j] * 1.0; // the 1.0 is the constant input for any bias; could leave out
// hBiases[j] += delta;
//}
// 3. update input-hidden weights and hidden biases
// combined for processing efficiency at expense of clarity
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue;
for (int i = 0; i < numInput; ++i)
{
double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta
ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very tricky.
}
double biasDelta = learnRate * hGrads[j] * 1.0; // the 1.0 is the constant input for any bias; could leave out
hBiases[j] += biasDelta;
}
//// 4. update hidden-output weights
//for (int j = 0; j < numHidden; ++j)
//{
// if (IsDropNode(j, dropNodes) == true) continue;
// for (int k = 0; k < numOutput; ++k)
// {
// double delta = learnRate * oGrads[k] * hOutputs[j]; // see above: hOutputs are inputs to the nn outputs
// hoWeights[j][k] += delta;
// }
//}
//// 4b. update output biases
//for (int k = 0; k < numOutput; ++k)
//{
// double delta = learnRate * oGrads[k] * 1.0;
// oBiases[k] += delta;
//}
// 4. update hidden-output weights and output biases
// combined for processing efficiency at expense of clarity
for (int k = 0; k < numOutput; ++k)
{
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue;
double delta = learnRate * oGrads[k] * hOutputs[j]; // see above: hOutputs are inputs to the nn outputs
hoWeights[j][k] += delta;
}
double biasDelta = learnRate * oGrads[k] * 1.0;
oBiases[k] += biasDelta;
}
} // UpdateWeights
// ----------------------------------------------------------------------------------------
public void Train(double[][] trainData, int maxEpochs, double learnRate)
{
// train a back-prop style NN classifier using dropout
// no momentum or weight decay
int epoch = 0;
double[] xValues = new double[numInput]; // inputs
double[] tValues = new double[numOutput]; // target values
int[] sequence = new int[trainData.Length];
for (int i = 0; i < sequence.Length; ++i)
sequence[i] = i;
while (epoch < maxEpochs)
{
// MSE early exit
//double mse = MeanSquaredError(trainData); // expensive! consider only every k epochs
//if (mse < 0.010) break; // consider passing value in as parameter
Shuffle(sequence); // visit each training data in random order
for (int i = 0; i < trainData.Length; ++i)
{
int idx = sequence[i];
Array.Copy(trainData[idx], xValues, numInput); // more flexible might be a 'GetInputsAndTargets()'
Array.Copy(trainData[idx], numInput, tValues, 0, numOutput);
int[] dropNodes = MakeDropNodes();
ComputeOutputs(xValues, dropNodes); // copy xValues in, compute outputs (and store them internally)
UpdateWeights(tValues, learnRate, dropNodes);
} // each training tuple
++epoch;
}
// divide hidden-output weights by 2.0 to account for dropout
for (int j = 0; j < numHidden; ++j)
for (int k = 0; k < numOutput; ++k)
hoWeights[j][k] /= 2.0;
} // Train
private static void Shuffle(int[] sequence)
{
for (int i = 0; i < sequence.Length; ++i)
{
int r = rnd.Next(i, sequence.Length);
int tmp = sequence[r];
sequence[r] = sequence[i];
sequence[i] = tmp;
}
}
private double MeanSquaredError(double[][] trainData) // used as a training stopping condition
{
// average squared error per training tuple
double sumSquaredError = 0.0;
double[] xValues = new double[numInput]; // first numInput values in trainData
double[] tValues = new double[numOutput]; // last numOutput values
for (int i = 0; i < trainData.Length; ++i) // looks like (6.9 3.2 5.7 2.3) (0 0 1) (no parens)
{
Array.Copy(trainData[i], xValues, numInput); // get xValues. assumes in first columns!
Array.Copy(trainData[i], numInput, tValues, 0, numOutput); // get target values
double[] yValues = this.ComputeOutputs(xValues, null); // using current weights (no drop-nodes)
for (int j = 0; j < numOutput; ++j)
{
double err = tValues[j] - yValues[j];
sumSquaredError += err * err;
}
}
return sumSquaredError / trainData.Length;
}
//private double MeanCrossEntropyError(double[][] trainData)
//{
// double sumError = 0.0;
// double[] xValues = new double[numInput]; // first numInput values in trainData
// double[] tValues = new double[numOutput]; // last numOutput values
// for (int i = 0; i < trainData.Length; ++i) // training data: (6.9 3.2 5.7 2.3) (0 0 1) parens not there
// {
// Array.Copy(trainData[i], xValues, numInput); // get xValues.
// Array.Copy(trainData[i], numInput, tValues, 0, numOutput); // get target values
// double[] yValues = this.ComputeOutputs(xValues); // compute output using current weights
// for (int j = 0; j < numOutput; ++j)
// {
// sumError += Math.Log(yValues[j]) * tValues[j]; // CE error for one training data
// }
// }
// return -1.0 * sumError / trainData.Length;
//}
// ----------------------------------------------------------------------------------------
public double Accuracy(double[][] testData)
{
// percentage correct using winner-takes all
int numCorrect = 0;
int numWrong = 0;
double[] xValues = new double[numInput]; // inputs
double[] tValues = new double[numOutput]; // targets
double[] yValues; // computed Y
for (int i = 0; i < testData.Length; ++i)
{
Array.Copy(testData[i], xValues, numInput); // parse test data into x-values and t-values
Array.Copy(testData[i], numInput, tValues, 0, numOutput);
yValues = this.ComputeOutputs(xValues, null); // null == don't use any drop-nodes
int maxIndex = MaxIndex(yValues); // which cell in yValues has largest value?
if (tValues[maxIndex] == 1.0) // ugly. consider AreEqual(double x, double y)
++numCorrect;
else
++numWrong;
}
return (numCorrect * 1.0) / (numCorrect + numWrong); // ugly 2 - check for divide by zero
}
private static int MaxIndex(double[] vector) // helper for Accuracy()
{
// index of largest value
int bigIndex = 0;
double biggestVal = vector[0];
for (int i = 0; i < vector.Length; ++i)
{
if (vector[i] > biggestVal)
{
biggestVal = vector[i]; bigIndex = i;
}
}
return bigIndex;
}
} // class NeuralNetwork
} // ns
using System.Collections.Generic;
namespace DropoutDemo
{
class DropoutProgram
{
static void Main(string[] args)
{
Console.WriteLine("\nBegin neural network dropout demo");
Console.WriteLine("\nData is the famous Iris flower set.");
Console.WriteLine("Input is sepal length, width, petal length, width");
Console.WriteLine("Class to predict is species");
Console.WriteLine("setosa = 0 0 1, versicolor = 0 1 0, virginica = 1 0 0 ");
Console.WriteLine("\nRaw data has 150 total items like:\n");
Console.WriteLine(" 5.1, 3.5, 1.4, 0.2, Iris setosa");
Console.WriteLine(" 7.0, 3.2, 4.7, 1.4, Iris versicolor");
Console.WriteLine(" 6.3, 3.3, 6.0, 2.5, Iris virginica");
Console.WriteLine(" ......\n");
Console.WriteLine("Loading 80-20% training-test data");
double[][] trainData = new double[120][];
trainData[0] = new double[] { 6.0, 3.4, 4.5, 1.6, 0, 1, 0 };
trainData[1] = new double[] { 6.7, 2.5, 5.8, 1.8, 1, 0, 0 };
trainData[2] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
trainData[3] = new double[] { 7.7, 2.8, 6.7, 2.0, 1, 0, 0 };
trainData[4] = new double[] { 6.4, 3.2, 5.3, 2.3, 1, 0, 0 };
trainData[5] = new double[] { 6.7, 3.1, 4.7, 1.5, 0, 1, 0 };
trainData[6] = new double[] { 6.3, 3.4, 5.6, 2.4, 1, 0, 0 };
trainData[7] = new double[] { 5.9, 3.2, 4.8, 1.8, 0, 1, 0 };
trainData[8] = new double[] { 6.3, 2.5, 5.0, 1.9, 1, 0, 0 };
trainData[9] = new double[] { 4.6, 3.2, 1.4, 0.2, 0, 0, 1 };
trainData[10] = new double[] { 7.0, 3.2, 4.7, 1.4, 0, 1, 0 };
trainData[11] = new double[] { 6.6, 3.0, 4.4, 1.4, 0, 1, 0 };
trainData[12] = new double[] { 5.7, 2.8, 4.1, 1.3, 0, 1, 0 };
trainData[13] = new double[] { 6.7, 3.0, 5.0, 1.7, 0, 1, 0 };
trainData[14] = new double[] { 6.5, 3.0, 5.2, 2.0, 1, 0, 0 };
trainData[15] = new double[] { 5.1, 3.8, 1.5, 0.3, 0, 0, 1 };
trainData[16] = new double[] { 7.9, 3.8, 6.4, 2.0, 1, 0, 0 };
trainData[17] = new double[] { 5.9, 3.0, 5.1, 1.8, 1, 0, 0 };
trainData[18] = new double[] { 7.3, 2.9, 6.3, 1.8, 1, 0, 0 };
trainData[19] = new double[] { 5.0, 2.0, 3.5, 1.0, 0, 1, 0 };
trainData[20] = new double[] { 6.2, 2.8, 4.8, 1.8, 1, 0, 0 };
trainData[21] = new double[] { 7.4, 2.8, 6.1, 1.9, 1, 0, 0 };
trainData[22] = new double[] { 6.2, 3.4, 5.4, 2.3, 1, 0, 0 };
trainData[23] = new double[] { 5.2, 3.5, 1.5, 0.2, 0, 0, 1 };
trainData[24] = new double[] { 6.8, 3.0, 5.5, 2.1, 1, 0, 0 };
trainData[25] = new double[] { 5.5, 2.6, 4.4, 1.2, 0, 1, 0 };
trainData[26] = new double[] { 6.9, 3.1, 5.1, 2.3, 1, 0, 0 };
trainData[27] = new double[] { 6.4, 2.7, 5.3, 1.9, 1, 0, 0 };
trainData[28] = new double[] { 5.6, 2.7, 4.2, 1.3, 0, 1, 0 };
trainData[29] = new double[] { 4.4, 3.0, 1.3, 0.2, 0, 0, 1 };
trainData[30] = new double[] { 6.9, 3.1, 4.9, 1.5, 0, 1, 0 };
trainData[31] = new double[] { 5.4, 3.0, 4.5, 1.5, 0, 1, 0 };
trainData[32] = new double[] { 5.8, 2.7, 4.1, 1.0, 0, 1, 0 };
trainData[33] = new double[] { 4.6, 3.6, 1.0, 0.2, 0, 0, 1 };
trainData[34] = new double[] { 5.1, 3.5, 1.4, 0.2, 0, 0, 1 };
trainData[35] = new double[] { 4.9, 3.0, 1.4, 0.2, 0, 0, 1 };
trainData[36] = new double[] { 5.1, 3.4, 1.5, 0.2, 0, 0, 1 };
trainData[37] = new double[] { 5.5, 2.4, 3.8, 1.1, 0, 1, 0 };
trainData[38] = new double[] { 6.8, 2.8, 4.8, 1.4, 0, 1, 0 };
trainData[39] = new double[] { 6.7, 3.0, 5.2, 2.3, 1, 0, 0 };
trainData[40] = new double[] { 5.7, 3.0, 4.2, 1.2, 0, 1, 0 };
trainData[41] = new double[] { 6.0, 2.2, 5.0, 1.5, 1, 0, 0 };
trainData[42] = new double[] { 6.5, 2.8, 4.6, 1.5, 0, 1, 0 };
trainData[43] = new double[] { 6.3, 2.5, 4.9, 1.5, 0, 1, 0 };
trainData[44] = new double[] { 6.7, 3.1, 5.6, 2.4, 1, 0, 0 };
trainData[45] = new double[] { 6.4, 2.8, 5.6, 2.1, 1, 0, 0 };
trainData[46] = new double[] { 5.5, 2.4, 3.7, 1.0, 0, 1, 0 };
trainData[47] = new double[] { 5.2, 3.4, 1.4, 0.2, 0, 0, 1 };
trainData[48] = new double[] { 6.0, 2.2, 4.0, 1.0, 0, 1, 0 };
trainData[49] = new double[] { 6.1, 2.8, 4.0, 1.3, 0, 1, 0 };
trainData[50] = new double[] { 6.1, 3.0, 4.6, 1.4, 0, 1, 0 };
trainData[51] = new double[] { 5.0, 3.2, 1.2, 0.2, 0, 0, 1 };
trainData[52] = new double[] { 4.8, 3.4, 1.9, 0.2, 0, 0, 1 };
trainData[53] = new double[] { 6.3, 3.3, 6.0, 2.5, 1, 0, 0 };
trainData[54] = new double[] { 5.0, 3.5, 1.6, 0.6, 0, 0, 1 };
trainData[55] = new double[] { 6.0, 3.0, 4.8, 1.8, 1, 0, 0 };
trainData[56] = new double[] { 6.3, 2.8, 5.1, 1.5, 1, 0, 0 };
trainData[57] = new double[] { 7.2, 3.2, 6.0, 1.8, 1, 0, 0 };
trainData[58] = new double[] { 4.6, 3.4, 1.4, 0.3, 0, 0, 1 };
trainData[59] = new double[] { 6.9, 3.2, 5.7, 2.3, 1, 0, 0 };
trainData[60] = new double[] { 6.5, 3.0, 5.5, 1.8, 1, 0, 0 };
trainData[61] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
trainData[62] = new double[] { 5.1, 3.8, 1.9, 0.4, 0, 0, 1 };
trainData[63] = new double[] { 5.3, 3.7, 1.5, 0.2, 0, 0, 1 };
trainData[64] = new double[] { 5.0, 3.3, 1.4, 0.2, 0, 0, 1 };
trainData[65] = new double[] { 6.6, 2.9, 4.6, 1.3, 0, 1, 0 };
trainData[66] = new double[] { 5.9, 3.0, 4.2, 1.5, 0, 1, 0 };
trainData[67] = new double[] { 5.2, 2.7, 3.9, 1.4, 0, 1, 0 };
trainData[68] = new double[] { 7.7, 3.0, 6.1, 2.3, 1, 0, 0 };
trainData[69] = new double[] { 5.4, 3.9, 1.7, 0.4, 0, 0, 1 };
trainData[70] = new double[] { 5.1, 3.5, 1.4, 0.3, 0, 0, 1 };
trainData[71] = new double[] { 7.2, 3.6, 6.1, 2.5, 1, 0, 0 };
trainData[72] = new double[] { 6.4, 3.2, 4.5, 1.5, 0, 1, 0 };
trainData[73] = new double[] { 4.8, 3.0, 1.4, 0.3, 0, 0, 1 };
trainData[74] = new double[] { 6.2, 2.2, 4.5, 1.5, 0, 1, 0 };
trainData[75] = new double[] { 5.8, 2.7, 3.9, 1.2, 0, 1, 0 };
trainData[76] = new double[] { 7.2, 3.0, 5.8, 1.6, 1, 0, 0 };
trainData[77] = new double[] { 4.7, 3.2, 1.6, 0.2, 0, 0, 1 };
trainData[78] = new double[] { 6.3, 2.3, 4.4, 1.3, 0, 1, 0 };
trainData[79] = new double[] { 6.8, 3.2, 5.9, 2.3, 1, 0, 0 };
trainData[80] = new double[] { 5.0, 2.3, 3.3, 1.0, 0, 1, 0 };
trainData[81] = new double[] { 5.7, 2.5, 5.0, 2.0, 1, 0, 0 };
trainData[82] = new double[] { 7.7, 2.6, 6.9, 2.3, 1, 0, 0 };
trainData[83] = new double[] { 6.5, 3.0, 5.8, 2.2, 1, 0, 0 };
trainData[84] = new double[] { 6.1, 2.8, 4.7, 1.2, 0, 1, 0 };
trainData[85] = new double[] { 4.7, 3.2, 1.3, 0.2, 0, 0, 1 };
trainData[86] = new double[] { 6.9, 3.1, 5.4, 2.1, 1, 0, 0 };
trainData[87] = new double[] { 6.4, 3.1, 5.5, 1.8, 1, 0, 0 };
trainData[88] = new double[] { 6.0, 2.9, 4.5, 1.5, 0, 1, 0 };
trainData[89] = new double[] { 6.4, 2.9, 4.3, 1.3, 0, 1, 0 };
trainData[90] = new double[] { 4.4, 2.9, 1.4, 0.2, 0, 0, 1 };
trainData[91] = new double[] { 5.0, 3.6, 1.4, 0.2, 0, 0, 1 };
trainData[92] = new double[] { 4.4, 3.2, 1.3, 0.2, 0, 0, 1 };
trainData[93] = new double[] { 5.1, 3.7, 1.5, 0.4, 0, 0, 1 };
trainData[94] = new double[] { 4.8, 3.1, 1.6, 0.2, 0, 0, 1 };
trainData[95] = new double[] { 6.5, 3.2, 5.1, 2.0, 1, 0, 0 };
trainData[96] = new double[] { 6.1, 2.9, 4.7, 1.4, 0, 1, 0 };
trainData[97] = new double[] { 5.4, 3.7, 1.5, 0.2, 0, 0, 1 };
trainData[98] = new double[] { 5.7, 3.8, 1.7, 0.3, 0, 0, 1 };
trainData[99] = new double[] { 7.1, 3.0, 5.9, 2.1, 1, 0, 0 };
trainData[100] = new double[] { 5.4, 3.9, 1.3, 0.4, 0, 0, 1 };
trainData[101] = new double[] { 6.1, 2.6, 5.6, 1.4, 1, 0, 0 };
trainData[102] = new double[] { 6.4, 2.8, 5.6, 2.2, 1, 0, 0 };
trainData[103] = new double[] { 5.0, 3.0, 1.6, 0.2, 0, 0, 1 };
trainData[104] = new double[] { 5.8, 2.8, 5.1, 2.4, 1, 0, 0 };
trainData[105] = new double[] { 6.3, 2.9, 5.6, 1.8, 1, 0, 0 };
trainData[106] = new double[] { 6.2, 2.9, 4.3, 1.3, 0, 1, 0 };
trainData[107] = new double[] { 5.5, 3.5, 1.3, 0.2, 0, 0, 1 };
trainData[108] = new double[] { 6.7, 3.1, 4.4, 1.4, 0, 1, 0 };
trainData[109] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
trainData[110] = new double[] { 4.6, 3.1, 1.5, 0.2, 0, 0, 1 };
trainData[111] = new double[] { 5.0, 3.5, 1.3, 0.3, 0, 0, 1 };
trainData[112] = new double[] { 5.5, 2.5, 4.0, 1.3, 0, 1, 0 };
trainData[113] = new double[] { 5.5, 4.2, 1.4, 0.2, 0, 0, 1 };
trainData[114] = new double[] { 5.5, 2.3, 4.0, 1.3, 0, 1, 0 };
trainData[115] = new double[] { 5.2, 4.1, 1.5, 0.1, 0, 0, 1 };
trainData[116] = new double[] { 5.6, 2.5, 3.9, 1.1, 0, 1, 0 };
trainData[117] = new double[] { 5.6, 2.9, 3.6, 1.3, 0, 1, 0 };
trainData[118] = new double[] { 4.9, 2.4, 3.3, 1.0, 0, 1, 0 };
trainData[119] = new double[] { 5.7, 2.8, 4.5, 1.3, 0, 1, 0 };
double[][] testData = new double[30][];
testData[0] = new double[] { 6.0, 2.7, 5.1, 1.6, 0, 1, 0 };
testData[1] = new double[] { 5.1, 3.3, 1.7, 0.5, 0, 0, 1 };
testData[2] = new double[] { 6.7, 3.3, 5.7, 2.1, 1, 0, 0 };
testData[3] = new double[] { 5.1, 2.5, 3.0, 1.1, 0, 1, 0 };
testData[4] = new double[] { 5.6, 2.8, 4.9, 2.0, 1, 0, 0 };
testData[5] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
testData[6] = new double[] { 5.0, 3.4, 1.5, 0.2, 0, 0, 1 };
testData[7] = new double[] { 5.4, 3.4, 1.7, 0.2, 0, 0, 1 };
testData[8] = new double[] { 4.9, 2.5, 4.5, 1.7, 1, 0, 0 };
testData[9] = new double[] { 5.7, 4.4, 1.5, 0.4, 0, 0, 1 };
testData[10] = new double[] { 7.7, 3.8, 6.7, 2.2, 1, 0, 0 };
testData[11] = new double[] { 5.7, 2.9, 4.2, 1.3, 0, 1, 0 };
testData[12] = new double[] { 5.0, 3.4, 1.6, 0.4, 0, 0, 1 };
testData[13] = new double[] { 6.3, 3.3, 4.7, 1.6, 0, 1, 0 };
testData[14] = new double[] { 4.5, 2.3, 1.3, 0.3, 0, 0, 1 };
testData[15] = new double[] { 4.8, 3.4, 1.6, 0.2, 0, 0, 1 };
testData[16] = new double[] { 5.8, 4.0, 1.2, 0.2, 0, 0, 1 };
testData[17] = new double[] { 6.7, 3.3, 5.7, 2.5, 1, 0, 0 };
testData[18] = new double[] { 4.3, 3.0, 1.1, 0.1, 0, 0, 1 };
testData[19] = new double[] { 5.4, 3.4, 1.5, 0.4, 0, 0, 1 };
testData[20] = new double[] { 5.6, 3.0, 4.1, 1.3, 0, 1, 0 };
testData[21] = new double[] { 6.1, 3.0, 4.9, 1.8, 1, 0, 0 };
testData[22] = new double[] { 5.7, 2.6, 3.5, 1.0, 0, 1, 0 };
testData[23] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
testData[24] = new double[] { 5.6, 3.0, 4.5, 1.5, 0, 1, 0 };
testData[25] = new double[] { 4.8, 3.0, 1.4, 0.1, 0, 0, 1 };
testData[26] = new double[] { 5.1, 3.8, 1.6, 0.2, 0, 0, 1 };
testData[27] = new double[] { 7.6, 3.0, 6.6, 2.1, 1, 0, 0 };
testData[28] = new double[] { 6.3, 2.7, 4.9, 1.8, 1, 0, 0 };
testData[29] = new double[] { 5.8, 2.6, 4.0, 1.2, 0, 1, 0 };
Console.WriteLine("\nFirst 5 rows of training data:");
ShowMatrix(trainData, 5, 1, true);
Console.WriteLine("First 3 rows of test data:");
ShowMatrix(testData, 3, 1, true);
Console.WriteLine("\nCreating a 4-input, 9-hidden, 3-output dropout neural network");
Console.WriteLine("Using tanh (hidden) and softmax (output) activations");
const int numInput = 4;
const int numHidden = 9;
const int numOutput = 3;
NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput);
int maxEpochs = 1000;
double learnRate = 0.05;
Console.WriteLine("Setting maxEpochs = 1000, learnRate = 0.05");
Console.WriteLine("No momentum or weight decay");
Console.WriteLine("No early exit on low error condition");
Console.WriteLine("\nBeginning training using back-propagation with dropout\n");
nn.Train(trainData, maxEpochs, learnRate);
Console.WriteLine("Training complete\n");
double[] weights = nn.GetWeights();
Console.WriteLine("Final neural network weights and bias values:");
ShowVector(weights, 10, 3, true);
double trainAcc = nn.Accuracy(trainData);
Console.WriteLine("\nAccuracy on training data = " + trainAcc.ToString("F4"));
double testAcc = nn.Accuracy(testData);
Console.WriteLine("\nAccuracy on test data = " + testAcc.ToString("F4"));
Console.WriteLine("\nEnd dropout demo\n");
Console.ReadLine();
} // Main
static void ShowVector(double[] vector, int valsPerRow, int decimals, bool newLine)
{
for (int i = 0; i < vector.Length; ++i)
{
if (i % valsPerRow == 0) Console.WriteLine("");
Console.Write(vector[i].ToString("F" + decimals).PadLeft(decimals + 4) + " ");
}
if (newLine == true) Console.WriteLine("");
}
static void ShowMatrix(double[][] matrix, int numRows, int decimals, bool newLine)
{
for (int i = 0; i < numRows; ++i)
{
Console.Write(i.ToString().PadLeft(3) + ": ");
for (int j = 0; j < matrix[i].Length; ++j)
{
if (matrix[i][j] >= 0.0) Console.Write(" "); else Console.Write("-"); ;
Console.Write(Math.Abs(matrix[i][j]).ToString("F" + decimals) + " ");
}
Console.WriteLine("");
}
if (newLine == true) Console.WriteLine("");
}
static double[] MySoftmax(double[] oSums) // does all output nodes at once so scale doesn't have to be re-computed each time
{
// determine max output sum
double max = oSums[0];
for (int i = 0; i < oSums.Length; ++i)
if (oSums[i] > max) max = oSums[i];
// determine scaling factor -- sum of exp(each val - max)
double scale = 0.0;
for (int i = 0; i < oSums.Length; ++i)
scale += Math.Exp(oSums[i] - max);
double[] result = new double[oSums.Length];
for (int i = 0; i < oSums.Length; ++i)
result[i] = Math.Exp(oSums[i] - max) / scale;
return result; // now scaled so that xi sum to 1.0
}
} // class Program
public class NeuralNetwork
{
private static Random rnd;
private int numInput;
private int numHidden;
private int numOutput;
private double[] inputs;
private double[][] ihWeights; // input-hidden
private double[] hBiases;
private double[] hOutputs;
private double[][] hoWeights; // hidden-output
private double[] oBiases;
private double[] outputs;
public NeuralNetwork(int numInput, int numHidden, int numOutput)
{
rnd = new Random(0); // multi-purpose, inc. dropout
this.numInput = numInput;
this.numHidden = numHidden;
this.numOutput = numOutput;
this.inputs = new double[numInput];
this.ihWeights = MakeMatrix(numInput, numHidden);
this.hBiases = new double[numHidden];
this.hOutputs = new double[numHidden];
this.hoWeights = MakeMatrix(numHidden, numOutput);
this.oBiases = new double[numOutput];
this.outputs = new double[numOutput];
InitializeWeights(); // set weights and biases to small random values
} // ctor
private static double[][] MakeMatrix(int rows, int cols) // helper for ctor
{
double[][] result = new double[rows][];
for (int r = 0; r < result.Length; ++r)
result[r] = new double[cols];
return result;
}
// public override string ToString() . .
// ----------------------------------------------------------------------------------------
public void SetWeights(double[] weights)
{
// copy weights and biases in weights[] array to i-h weights, i-h biases, h-o weights, h-o biases
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
if (weights.Length != numWeights)
throw new Exception("Bad weights array length: ");
int k = 0; // points into weights param
for (int i = 0; i < numInput; ++i)
for (int j = 0; j < numHidden; ++j)
ihWeights[i][j] = weights[k++];
for (int i = 0; i < numHidden; ++i)
hBiases[i] = weights[k++];
for (int i = 0; i < numHidden; ++i)
for (int j = 0; j < numOutput; ++j)
hoWeights[i][j] = weights[k++];
for (int i = 0; i < numOutput; ++i)
oBiases[i] = weights[k++];
}
private void InitializeWeights()
{
// initialize weights and biases to small random values
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
double[] initialWeights = new double[numWeights];
double lo = -0.01;
double hi = 0.01;
for (int i = 0; i < initialWeights.Length; ++i)
initialWeights[i] = (hi - lo) * rnd.NextDouble() + lo;
this.SetWeights(initialWeights);
}
public double[] GetWeights()
{
// returns the current set of weights, presumably after dropout-training
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
double[] result = new double[numWeights];
int k = 0;
for (int i = 0; i < numInput; ++i)
for (int j = 0; j < numHidden; ++j)
result[k++] = ihWeights[i][j];
for (int i = 0; i < numHidden; ++i)
result[k++] = hBiases[i];
for (int i = 0; i < numHidden; ++i)
for (int j = 0; j < numOutput; ++j)
result[k++] = hoWeights[i][j];
for (int i = 0; i < numOutput; ++i)
result[k++] = oBiases[i];
return result;
}
// ----------------------------------------------------------------------------------------
private int[] MakeDropNodes()
{
List<int> resultList = new List<int>();
for (int i = 0; i < this.numHidden; ++i)
{
double p = rnd.NextDouble();
if (p < 0.50)
resultList.Add(i);
}
if (resultList.Count == 0)
resultList.Add(rnd.Next(0, numHidden));
else if (resultList.Count == numHidden)
resultList.RemoveAt(rnd.Next(0, numHidden));
return resultList.ToArray();
}
private bool IsDropNode(int node, int[] dropNodes)
{
if (dropNodes == null)
return false;
if (Array.BinarySearch(dropNodes, node) >= 0)
return true;
else
return false;
}
private double[] ComputeOutputs(double[] xValues, int[] dropNodes)
{
// skips hidden nodes int dropNodes[]
// if dropNodes[] is null, no nodes are dropped
// i = input index, j = hidden, k = output
if (xValues.Length != numInput)
throw new Exception("Bad xValues array length");
double[] hSums = new double[numHidden]; // hidden nodes sums scratch array
double[] oSums = new double[numOutput]; // output nodes sums
for (int i = 0; i < xValues.Length; ++i) // copy x-values to inputs
this.inputs[i] = xValues[i];
for (int j = 0; j < numHidden; ++j) // each hidden node
{
if (IsDropNode(j, dropNodes) == true) continue; // skip
for (int i = 0; i < numInput; ++i)
hSums[j] += this.inputs[i] * this.ihWeights[i][j]; // accumulate sum (note +=)
hSums[j] += this.hBiases[j]; // add bias
this.hOutputs[j] = HyperTanFunction(hSums[j]); // apply activation
}
for (int k = 0; k < numOutput; ++k) // each output node
{
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue; // skip
oSums[k] += hOutputs[j] * hoWeights[j][k];
}
oSums[k] += oBiases[k]; // add bias
double[] softOut = Softmax(oSums); // softmax activation does all outputs at once for efficiency
Array.Copy(softOut, outputs, softOut.Length);
}
// copy this.outputs to return for calling convenience
double[] retResult = new double[numOutput]; // could define a GetOutputs method instead
Array.Copy(this.outputs, retResult, retResult.Length);
return retResult;
} // ComputeOutputs
private static double HyperTanFunction(double x)
{
if (x < -20.0) return -1.0; // approximation is correct to 30 decimals
else if (x > 20.0) return 1.0;
else return Math.Tanh(x);
}
private static double[] Softmax(double[] oSums) // does all output nodes at once so scale doesn't have to be re-computed each time
{
// determine max output sum
double max = oSums[0];
for (int i = 0; i < oSums.Length; ++i)
if (oSums[i] > max) max = oSums[i];
// determine scaling factor -- sum of exp(each val - max)
double scale = 0.0;
for (int i = 0; i < oSums.Length; ++i)
scale += Math.Exp(oSums[i] - max);
double[] result = new double[oSums.Length];
for (int i = 0; i < oSums.Length; ++i)
result[i] = Math.Exp(oSums[i] - max) / scale;
return result; // now scaled so that xi sum to 1.0
}
// ----------------------------------------------------------------------------------------
private void UpdateWeights(double[] tValues, double learnRate, int[] dropNodes)
{
// update the weights and biases using back-propagation
// assumes that SetWeights and ComputeOutputs have been called
if (tValues.Length != numOutput)
throw new Exception("target values not same Length as output in UpdateWeights");
// back-prop related arrays. could be class members to avoid millions of allocations
double[] hGrads = new double[numHidden];
double[] oGrads = new double[numOutput];
// 1. compute output gradients
for (int k = 0; k < numOutput; ++k)
{
// implicit MSE
double derivative = (1 - outputs[k]) * outputs[k]; // derivative of softmax = (1 - y) * y (same as log-sigmoid)
oGrads[k] = derivative * (tValues[k] - outputs[k]); // 'mean squared error version' includes (1-y)(y) derivative
}
// 2. compute hidden gradients
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue;
double derivative = (1 - hOutputs[j]) * (1 + hOutputs[j]); // derivative of tanh = (1 - y) * (1 + y)
double sum = 0.0;
for (int k = 0; k < numOutput; ++k) // each hidden delta is the sum of numOutput terms
{
double x = oGrads[k] * hoWeights[j][k];
sum += x;
}
hGrads[j] = derivative * sum;
}
//// 3a. update input-hidden weights (gradients must be computed right-to-left but weights can be updated in any order)
//for (int i = 0; i < numInput; ++i) // 0..2 (3)
//{
// for (int j = 0; j < numHidden; ++j) // 0..3 (4)
// {
// if (IsDropNode(j, dropNodes) == true) continue;
// double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta
// ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very tricky.
// }
//}
//// 3b. update hidden biases
//for (int j = 0; j < numHidden; ++j)
//{
// if (IsDropNode(j, dropNodes) == true) continue;
// double delta = learnRate * hGrads[j] * 1.0; // the 1.0 is the constant input for any bias; could leave out
// hBiases[j] += delta;
//}
// 3. update input-hidden weights and hidden biases
// combined for processing efficiency at expense of clarity
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue;
for (int i = 0; i < numInput; ++i)
{
double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta
ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very tricky.
}
double biasDelta = learnRate * hGrads[j] * 1.0; // the 1.0 is the constant input for any bias; could leave out
hBiases[j] += biasDelta;
}
//// 4. update hidden-output weights
//for (int j = 0; j < numHidden; ++j)
//{
// if (IsDropNode(j, dropNodes) == true) continue;
// for (int k = 0; k < numOutput; ++k)
// {
// double delta = learnRate * oGrads[k] * hOutputs[j]; // see above: hOutputs are inputs to the nn outputs
// hoWeights[j][k] += delta;
// }
//}
//// 4b. update output biases
//for (int k = 0; k < numOutput; ++k)
//{
// double delta = learnRate * oGrads[k] * 1.0;
// oBiases[k] += delta;
//}
// 4. update hidden-output weights and output biases
// combined for processing efficiency at expense of clarity
for (int k = 0; k < numOutput; ++k)
{
for (int j = 0; j < numHidden; ++j)
{
if (IsDropNode(j, dropNodes) == true) continue;
double delta = learnRate * oGrads[k] * hOutputs[j]; // see above: hOutputs are inputs to the nn outputs
hoWeights[j][k] += delta;
}
double biasDelta = learnRate * oGrads[k] * 1.0;
oBiases[k] += biasDelta;
}
} // UpdateWeights
// ----------------------------------------------------------------------------------------
public void Train(double[][] trainData, int maxEpochs, double learnRate)
{
// train a back-prop style NN classifier using dropout
// no momentum or weight decay
int epoch = 0;
double[] xValues = new double[numInput]; // inputs
double[] tValues = new double[numOutput]; // target values
int[] sequence = new int[trainData.Length];
for (int i = 0; i < sequence.Length; ++i)
sequence[i] = i;
while (epoch < maxEpochs)
{
// MSE early exit
//double mse = MeanSquaredError(trainData); // expensive! consider only every k epochs
//if (mse < 0.010) break; // consider passing value in as parameter
Shuffle(sequence); // visit each training data in random order
for (int i = 0; i < trainData.Length; ++i)
{
int idx = sequence[i];
Array.Copy(trainData[idx], xValues, numInput); // more flexible might be a 'GetInputsAndTargets()'
Array.Copy(trainData[idx], numInput, tValues, 0, numOutput);
int[] dropNodes = MakeDropNodes();
ComputeOutputs(xValues, dropNodes); // copy xValues in, compute outputs (and store them internally)
UpdateWeights(tValues, learnRate, dropNodes);
} // each training tuple
++epoch;
}
// divide hidden-output weights by 2.0 to account for dropout
for (int j = 0; j < numHidden; ++j)
for (int k = 0; k < numOutput; ++k)
hoWeights[j][k] /= 2.0;
} // Train
private static void Shuffle(int[] sequence)
{
for (int i = 0; i < sequence.Length; ++i)
{
int r = rnd.Next(i, sequence.Length);
int tmp = sequence[r];
sequence[r] = sequence[i];
sequence[i] = tmp;
}
}
private double MeanSquaredError(double[][] trainData) // used as a training stopping condition
{
// average squared error per training tuple
double sumSquaredError = 0.0;
double[] xValues = new double[numInput]; // first numInput values in trainData
double[] tValues = new double[numOutput]; // last numOutput values
for (int i = 0; i < trainData.Length; ++i) // looks like (6.9 3.2 5.7 2.3) (0 0 1) (no parens)
{
Array.Copy(trainData[i], xValues, numInput); // get xValues. assumes in first columns!
Array.Copy(trainData[i], numInput, tValues, 0, numOutput); // get target values
double[] yValues = this.ComputeOutputs(xValues, null); // using current weights (no drop-nodes)
for (int j = 0; j < numOutput; ++j)
{
double err = tValues[j] - yValues[j];
sumSquaredError += err * err;
}
}
return sumSquaredError / trainData.Length;
}
//private double MeanCrossEntropyError(double[][] trainData)
//{
// double sumError = 0.0;
// double[] xValues = new double[numInput]; // first numInput values in trainData
// double[] tValues = new double[numOutput]; // last numOutput values
// for (int i = 0; i < trainData.Length; ++i) // training data: (6.9 3.2 5.7 2.3) (0 0 1) parens not there
// {
// Array.Copy(trainData[i], xValues, numInput); // get xValues.
// Array.Copy(trainData[i], numInput, tValues, 0, numOutput); // get target values
// double[] yValues = this.ComputeOutputs(xValues); // compute output using current weights
// for (int j = 0; j < numOutput; ++j)
// {
// sumError += Math.Log(yValues[j]) * tValues[j]; // CE error for one training data
// }
// }
// return -1.0 * sumError / trainData.Length;
//}
// ----------------------------------------------------------------------------------------
public double Accuracy(double[][] testData)
{
// percentage correct using winner-takes all
int numCorrect = 0;
int numWrong = 0;
double[] xValues = new double[numInput]; // inputs
double[] tValues = new double[numOutput]; // targets
double[] yValues; // computed Y
for (int i = 0; i < testData.Length; ++i)
{
Array.Copy(testData[i], xValues, numInput); // parse test data into x-values and t-values
Array.Copy(testData[i], numInput, tValues, 0, numOutput);
yValues = this.ComputeOutputs(xValues, null); // null == don't use any drop-nodes
int maxIndex = MaxIndex(yValues); // which cell in yValues has largest value?
if (tValues[maxIndex] == 1.0) // ugly. consider AreEqual(double x, double y)
++numCorrect;
else
++numWrong;
}
return (numCorrect * 1.0) / (numCorrect + numWrong); // ugly 2 - check for divide by zero
}
private static int MaxIndex(double[] vector) // helper for Accuracy()
{
// index of largest value
int bigIndex = 0;
double biggestVal = vector[0];
for (int i = 0; i < vector.Length; ++i)
{
if (vector[i] > biggestVal)
{
biggestVal = vector[i]; bigIndex = i;
}
}
return bigIndex;
}
} // class NeuralNetwork
} // ns
Комментариев нет:
Отправить комментарий