{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true,
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Implementing single layer, fully connected, neural network"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "First, we will import the libraries that we need..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "\n",
    "# taking real data from one of the built-in tutorials:\n",
    "from sklearn.datasets import fetch_mldata\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "from random import shuffle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Some tools that we need, nothing interesting here..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    '''\n",
    "        Returns two dictionaries (train and test), each has the keys \"X\" and \n",
    "        \"y\" for the examples and the labels accordingly.\n",
    "        X is a matrix where the columns are different features.\n",
    "    '''\n",
    "    print('loading data... (first time might take some time)')    \n",
    "    mnist = fetch_mldata('MNIST original')\n",
    "    X = mnist[\"data\"] / 255.0\n",
    "    y = OneHotEncoder(n_values=10, sparse=False).fit_transform(mnist[\"target\"].reshape([-1, 1]))\n",
    "    split_index = int(len(y)*0.8)\n",
    "    indices = np.random.permutation(len(y))\n",
    "    train_part, test_part = indices[:split_index], indices[split_index:]\n",
    "    train = {'X': X[train_part, :], 'y': y[train_part, :]}\n",
    "    test = {'X': X[test_part, :], 'y': y[test_part, :] }\n",
    "    print('Data loaded successfully.\\n')\n",
    "    return  train, test\n",
    "    \n",
    "    \n",
    "def calc_accurecy(predictions, true_classes):\n",
    "    '''\n",
    "        The average count of correct predictions in precentages \n",
    "    '''    \n",
    "    true_labels_as_integers = np.argmax(true_classes, 1)\n",
    "    return np.mean(np.equal(predictions, true_labels_as_integers).astype(np.float32)) * 100\n",
    "    \n",
    "    \n",
    "class CyclicDataIter(object):\n",
    "    '''\n",
    "        Construct an iterator which accepts dictionary of the data\n",
    "        with keys: \"X\" and \"y\", and a batch size. Then this object behaves as an infinite\n",
    "        cyclic iterator over the data (with wrap around). \n",
    "        Each iteration returns subset of the rows of X and subset of corresponding coordinates in y,\n",
    "        those subsets are of batch_size each.\n",
    "    '''\n",
    "    \n",
    "    def __init__(self, data_dict, batch_size):\n",
    "        \n",
    "        self._X, self._y = data_dict['X'], data_dict['y']\n",
    "        self._batch_size = batch_size\n",
    "        self._i = 0\n",
    "        self._max_i = self._X.shape[0]\n",
    "        \n",
    "    def __iter__(self):\n",
    "        return self\n",
    "       \n",
    "    def next(self):\n",
    "        end_i = self._i + self._batch_size\n",
    "                \n",
    "        if end_i <= self._max_i:\n",
    "            locations = range(self._i, end_i)\n",
    "            self._i = end_i\n",
    "            \n",
    "        else:            \n",
    "            locations = list(range(self._i, self._max_i))\n",
    "            self._i = end_i % self._max_i\n",
    "            locations.extend(range(self._i))\n",
    "        self._i = end_i % self._max_i\n",
    "        return self._X[locations, :], self._y[locations, :]\n",
    "\n",
    "    def __next__(self):\n",
    "        return self.next()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "# Construction Phase\n",
    "\n",
    "We start by loading the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "train, test = load_data()  \n",
    "\n",
    "print('The keys in each dictionary are:', train.keys(),'and', test.keys())\n",
    "print('The dimenstions of the training sets are', train['X'].shape, train['y'].shape)\n",
    "print('The dimenstions of the testing sets are', test['X'].shape, test['y'].shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "train_size, num_features = train['X'].shape\n",
    "num_classes = 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Now constructing the inputs to the network: \n",
    "\n",
    "Consuder the _shape_ property of the tensor that will denotes the inputs.\n",
    "Obviously it has to be two-dimensional, but what will be the size of the first dimension? \n",
    "\n",
    "The problem is that we have different number of __rows__ for the trainig set and for the test set. Therefore, the number of rows in our placeholder has to be flexible!\n",
    "\n",
    "Introducting the \"None\" value into tensors shape."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "data_input = tf.placeholder(shape=[None, num_features],\n",
    "                            dtype=tf.float32,\n",
    "                            name='inputs_placeholder')\n",
    "\n",
    "true_label = tf.placeholder(shape=[None, num_classes],\n",
    "                            dtype=tf.float32,\n",
    "                            name='true_labels_placeholder')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "print('The shape of the data is ', data_input.get_shape())\n",
    "print('The actual shape values are:', data_input.get_shape().as_list())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Create the _parameters_ that the network will try to learn:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "weights = tf.Variable(initial_value=tf.random_normal(shape=[num_features, num_classes], mean=0.0, stddev=0.1),\n",
    "                      name='weights')\n",
    "\n",
    "biases = tf.Variable(initial_value=tf.random_normal(shape=[num_classes], mean=0.0, stddev=0.1),\n",
    "                     name='biases')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Define the computations and keep the resulting tensors, we will use them later."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "layer_value = tf.nn.bias_add(tf.matmul(data_input, weights, name='mult_by_weights'), \n",
    "                          biases, \n",
    "                          name='adding_bias')\n",
    "\n",
    "# Alternative:\n",
    "#layer_value = tf.matmul(data_input, weights, name='mult_by_weights') + biases\n",
    "\n",
    "probabilities = tf.nn.softmax(layer_value, name='applying_activation')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "# check the dimensions of the outputs!\n",
    "print(probabilities.get_shape())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Observe that for each example, the network outputs a **probability** vector of length 10 (due to the _softmax_ activation). Those probabilities reflects what the network believe is the distribution of classes over the given example. In order to get actual prediction, we will return the _index_ of the coordinate with the largest probability."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "predictions = tf.argmax(probabilities, axis=1, name='predictions')\n",
    "print(predictions.get_shape())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Here is a simple trick to check which operations the graph contains (note that TF breaks some operations into sub operations so the list can be a bit longer than expected)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "print('\\n'.join(op.name for op in tf.get_default_graph().get_operations()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "#### Important side note about _reshaping_:\n",
    "\n",
    "Say that we wants to multiply the _predictions_ with a matrix. We will fail to apply <code>tf.matmul</code> because both inputs has to be matrices (whereas our predictions is a vector). The solution is to reshape the predictions into new container - a matrix with single column. It can be done as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "vector = tf.constant(1, shape=[100])\n",
    "matrix = tf.constant(0, shape=[10, 10])\n",
    "\n",
    "vector_to_matrix = tf.reshape(vector, [100, 1])\n",
    "matrix_to_vector = tf.reshape(matrix, [-1])\n",
    "high_order_tensor = tf.reshape(matrix, [2, 5, 5, -1])\n",
    "\n",
    "print('\"vector\" had shape', vector.get_shape(),'and now:', vector_to_matrix.get_shape())\n",
    "print('\"matrix\" had shape', matrix.get_shape(),'and now:', matrix_to_vector.get_shape())\n",
    "\n",
    "print('high order tensor has shape:', high_order_tensor.get_shape())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "## Trying to run the network!\n",
    "\n",
    "We need to add one more operation to the graph:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "init_all_variables_op = tf.global_variables_initializer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init_all_variables_op.run()  # why don't we pass the session object?\n",
    "\n",
    "    # performance on test set:\n",
    "    preds = sess.run(predictions, feed_dict={data_input: test['X']}) # recall that preds is a np.ndarray object\n",
    "    score = calc_accurecy(preds, test['y'])\n",
    "\n",
    "    print('Accuracy of the network on test set (in %%): %s' % score)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "# It left now to _train_ the network"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "In order to _learn_ we need to define the loss function and a learning algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "loss_tensor = tf.reduce_mean(-tf.reduce_sum(true_label * tf.log(probabilities), axis=1)) # computing cross entropy\n",
    "learn_op = tf.train.GradientDescentOptimizer(0.5).minimize(loss_tensor)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "Now we will train before predicting on the test set:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "with tf.Session() as sess:\n",
    "    init_all_variables_op.run()  \n",
    "    \n",
    "    # training:\n",
    "    train_iterator = CyclicDataIter(train, batch_size=100)\n",
    "    for i in range(1000):\n",
    "        x_batch, y_batch = next(train_iterator)        \n",
    "        sess.run(learn_op, feed_dict={data_input: x_batch, true_label: y_batch})\n",
    "\n",
    "    # performance on test set:    \n",
    "    preds = sess.run(predictions, feed_dict={data_input: test['X']})\n",
    "    score = calc_accurecy(preds, test['y'])\n",
    "\n",
    "    print('Accuracy of the network on test set (in %%): %s' % score)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "keras",
   "language": "python",
   "name": "keras"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}