From 556149e5cd47b239849a196a82e7bd3a20aaaba8 Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Sat, 1 Oct 2022 10:38:36 -0400
Subject: [PATCH 1/8] Add avalon-mlp.jl file

---
 multi-layer-perceptron/avalon-mlp.jl | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 multi-layer-perceptron/avalon-mlp.jl

diff --git a/multi-layer-perceptron/avalon-mlp.jl b/multi-layer-perceptron/avalon-mlp.jl
new file mode 100644
index 0000000..4833830
--- /dev/null
+++ b/multi-layer-perceptron/avalon-mlp.jl
@@ -0,0 +1 @@
+# Avalon.jl implementation of multi-layer perceptron
\ No newline at end of file

From 2e6e41115cd618a30445c319823fbff36eeed7af Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Sun, 16 Oct 2022 23:57:03 -0400
Subject: [PATCH 2/8] Add Avalon MLP notebook

---
 multi-layer-perceptron/avalon-mlp.ipynb | 477 ++++++++++++++++++++++++
 1 file changed, 477 insertions(+)
 create mode 100644 multi-layer-perceptron/avalon-mlp.ipynb

diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon-mlp.ipynb
new file mode 100644
index 0000000..aff56a5
--- /dev/null
+++ b/multi-layer-perceptron/avalon-mlp.ipynb
@@ -0,0 +1,477 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using Yota;\n",
+    "using MLDatasets;\n",
+    "using NNlib;\n",
+    "using Statistics;\n",
+    "using Distributions;\n",
+    "using Functors;\n",
+    "using Optimisers;\n",
+    "using Flux.Data;\n",
+    "using Flux: onehotbatch, @epochs;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Primitives"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Linear "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mutable struct Linear\n",
+    "    W::AbstractMatrix{T} where T\n",
+    "    b::AbstractVector{T} where T\n",
+    "end\n",
+    "\n",
+    "@functor Linear\n",
+    "\n",
+    "# Init\n",
+    "function Linear(in_features::Int, out_features::Int)\n",
+    "    k_sqrt = sqrt(1 / in_features)\n",
+    "    d = Uniform(-k_sqrt, k_sqrt)\n",
+    "    return Linear(rand(d, out_features, in_features), rand(d, out_features))\n",
+    "end\n",
+    "Linear(in_out::Pair{Int, Int}) = Linear(in_out[1], in_out[2])\n",
+    "\n",
+    "function Base.show(io::IO, l::Linear)\n",
+    "    o, i = size(l.W)\n",
+    "    print(io, \"Linear($i=>$o)\")\n",
+    "end\n",
+    "\n",
+    "# Forward\n",
+    "(l::Linear)(x::Union{AbstractVector{T}, AbstractMatrix{T}}) where T = l.W * x .+ l.b"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Logit Cross Entropy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "logitcrossentropy (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "function logitcrossentropy(ŷ, y; dims = 1, agg = mean)\n",
+    "    agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims));\n",
+    "  end"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Define the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mutable struct Net\n",
+    "    fc1::Linear\n",
+    "    fc2::Linear\n",
+    "end\n",
+    "\n",
+    "@functor Net\n",
+    "\n",
+    "# Init\n",
+    "Net() = Net(\n",
+    "    Linear(28*28, 100),\n",
+    "    Linear(100, 10)\n",
+    ")\n",
+    "\n",
+    "# Forward\n",
+    "function (model::Net)(x::AbstractArray)\n",
+    "    x = reshape(x, 28*28, :)\n",
+    "    x = model.fc1(x)\n",
+    "    x = relu(x)\n",
+    "    x = model.fc2(x)\n",
+    "    return x\n",
+    "end"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = MNIST(dir=\"/Users/trevoryu/Code/data/mnist\", split=:train);\n",
+    "test_dataset = MNIST(dir=\"/Users/trevoryu/Code/data/mnist\", split=:test);\n",
+    "\n",
+    "X_train = train_dataset.features;\n",
+    "Y_train = train_dataset.targets;\n",
+    "\n",
+    "X_test = test_dataset.features;\n",
+    "Y_test = test_dataset.targets;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(28, 28, 10000)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "size(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Flatten features to be 784 dim\n",
+    "X_train = reshape(X_train, 784, :);  # (dim x batch)\n",
+    "X_test = reshape(X_test, 784, :);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert targets to one-hot vectors\n",
+    "Y_train = onehotbatch(Y_train, 0:9);\n",
+    "Y_test = onehotbatch(Y_test, 0:9);  # (dim x batch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Net(Linear(784=>100), Linear(100=>10))"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Make model\n",
+    "mlp = Net()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup Adam optimizer\n",
+    "# Default Β is (0.9, 0.999)\n",
+    "state = Optimisers.setup(Optimisers.Adam(1e-3), mlp);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "loss_function (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create objective function\n",
+    "function loss_function(model::Net, x::AbstractArray, y::AbstractArray)\n",
+    "    ŷ = model(x)\n",
+    "    loss = logitcrossentropy(ŷ, y)\n",
+    "    return loss\n",
+    "end"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training loop"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.774987 seconds (162.82 k allocations: 1.951 GiB, 12.71% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 1 loss = 0.04094025307690705\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.646125 seconds (163.01 k allocations: 1.951 GiB, 10.26% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 2 loss = 0.03572923218462636\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.652080 seconds (163.01 k allocations: 1.951 GiB, 9.54% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 3 loss = 0.03166147383022924\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.682782 seconds (163.01 k allocations: 1.951 GiB, 11.74% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 4 loss = 0.027879119237525167\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.666789 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 5 loss = 0.02481490266472439\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.624532 seconds (163.01 k allocations: 1.951 GiB, 8.85% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 6 loss = 0.021184432907923646\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.604429 seconds (163.01 k allocations: 1.951 GiB, 8.57% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 7 loss = 0.019366974012950174\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.622498 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 8 loss = 0.016626981172646552\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.610364 seconds (163.01 k allocations: 1.951 GiB, 8.79% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 9 loss = 0.014004979298361142\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.661410 seconds (163.01 k allocations: 1.951 GiB, 10.90% gc time)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Info: epoch 10 loss = 0.012511318528209223\n",
+      "└ @ Main In[18]:12\n"
+     ]
+    }
+   ],
+   "source": [
+    "for epoch in 1:10\n",
+    "    losses = []\n",
+    "    @time begin\n",
+    "        for (x, y) in loader\n",
+    "            # loss_function does forward pass\n",
+    "            # Yota.jl grad function computes parameter gradients\n",
+    "            loss, g = grad(loss_function, mlp, x, y)\n",
+    "            # Optimiser updates parameters\n",
+    "            Optimisers.update!(state, mlp, g[2])\n",
+    "            push!(losses, loss)\n",
+    "            # TODO: Add accuracy computation\n",
+    "        end\n",
+    "        @info(\"epoch $epoch loss = $(mean(losses))\")\n",
+    "    end\n",
+    "end"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.8.0",
+   "language": "julia",
+   "name": "julia-1.8"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.8.0"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From d826c32cf7bcea36a843cb6a0944c3138370d0cf Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Fri, 21 Oct 2022 22:45:46 -0400
Subject: [PATCH 3/8] Add evaluation to notebook

---
 multi-layer-perceptron/avalon-mlp.ipynb | 179 ++++++++++++++++++------
 1 file changed, 134 insertions(+), 45 deletions(-)

diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon-mlp.ipynb
index aff56a5..7bfd691 100644
--- a/multi-layer-perceptron/avalon-mlp.ipynb
+++ b/multi-layer-perceptron/avalon-mlp.ipynb
@@ -20,8 +20,9 @@
     "using Distributions;\n",
     "using Functors;\n",
     "using Optimisers;\n",
-    "using Flux.Data;\n",
-    "using Flux: onehotbatch, @epochs;"
+    "using MLUtils: DataLoader;\n",
+    "using OneHotArrays: onehotbatch\n",
+    "using Metrics;"
    ]
   },
   {
@@ -92,8 +93,10 @@
     }
    ],
    "source": [
-    "function logitcrossentropy(ŷ, y; dims = 1, agg = mean)\n",
-    "    agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims));\n",
+    "function logitcrossentropy(ŷ, y; dims=1, agg=mean)\n",
+    "  # Compute cross entropy loss from logits\n",
+    "  # Cross entropy computed from NLL loss on logsoftmax of model outputs\n",
+    "    agg(.-sum(y .* logsoftmax(ŷ; dims=dims); dims=dims));\n",
     "  end"
    ]
   },
@@ -115,6 +118,7 @@
     "    fc2::Linear\n",
     "end\n",
     "\n",
+    "# Need to mark functor for Optimizer to work\n",
     "@functor Net\n",
     "\n",
     "# Init\n",
@@ -204,7 +208,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);"
+    "train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);\n",
+    "test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=128);"
    ]
   },
   {
@@ -216,7 +221,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -225,7 +230,7 @@
        "Net(Linear(784=>100), Linear(100=>10))"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -237,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -248,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -257,13 +262,13 @@
        "loss_function (generic function with 1 method)"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Create objective function\n",
+    "# Create objective function to optimize\n",
     "function loss_function(model::Net, x::AbstractArray, y::AbstractArray)\n",
     "    ŷ = model(x)\n",
     "    loss = logitcrossentropy(ŷ, y)\n",
@@ -280,157 +285,157 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.774987 seconds (162.82 k allocations: 1.951 GiB, 12.71% gc time)\n"
+      " 25.837786 seconds (157.79 M allocations: 10.050 GiB, 3.78% gc time, 91.53% compilation time: 0% of which was recompilation)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 1 loss = 0.04094025307690705\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 1 loss = 0.4370590642396691\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.646125 seconds (163.01 k allocations: 1.951 GiB, 10.26% gc time)\n"
+      "  0.757983 seconds (163.01 k allocations: 1.951 GiB, 11.52% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 2 loss = 0.03572923218462636\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 2 loss = 0.20768273653621058\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.652080 seconds (163.01 k allocations: 1.951 GiB, 9.54% gc time)\n"
+      "  0.755957 seconds (163.01 k allocations: 1.951 GiB, 9.69% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 3 loss = 0.03166147383022924\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 3 loss = 0.15264513650689454\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.682782 seconds (163.01 k allocations: 1.951 GiB, 11.74% gc time)\n"
+      "  0.766106 seconds (163.01 k allocations: 1.951 GiB, 11.41% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 4 loss = 0.027879119237525167\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 4 loss = 0.12068533624691201\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.666789 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n"
+      "  0.790535 seconds (163.01 k allocations: 1.951 GiB, 11.73% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 5 loss = 0.02481490266472439\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 5 loss = 0.09948631426212774\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.624532 seconds (163.01 k allocations: 1.951 GiB, 8.85% gc time)\n"
+      "  0.749042 seconds (163.01 k allocations: 1.951 GiB, 10.17% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 6 loss = 0.021184432907923646\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 6 loss = 0.08462848175017493\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.604429 seconds (163.01 k allocations: 1.951 GiB, 8.57% gc time)\n"
+      "  0.763076 seconds (163.01 k allocations: 1.951 GiB, 11.58% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 7 loss = 0.019366974012950174\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 7 loss = 0.07293157247018839\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.622498 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n"
+      "  0.737009 seconds (163.01 k allocations: 1.951 GiB, 8.39% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 8 loss = 0.016626981172646552\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 8 loss = 0.06216332802989073\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.610364 seconds (163.01 k allocations: 1.951 GiB, 8.79% gc time)\n"
+      "  0.724461 seconds (163.02 k allocations: 1.951 GiB, 8.17% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 9 loss = 0.014004979298361142\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 9 loss = 0.05457949791791655\n",
+      "└ @ Main In[13]:13\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  0.661410 seconds (163.01 k allocations: 1.951 GiB, 10.90% gc time)\n"
+      "  0.742743 seconds (163.01 k allocations: 1.951 GiB, 9.23% gc time)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 10 loss = 0.012511318528209223\n",
-      "└ @ Main In[18]:12\n"
+      "┌ Info: epoch 10 loss = 0.047562408875107556\n",
+      "└ @ Main In[13]:13\n"
      ]
     }
    ],
@@ -438,19 +443,103 @@
     "for epoch in 1:10\n",
     "    losses = []\n",
     "    @time begin\n",
-    "        for (x, y) in loader\n",
+    "        for (x, y) in train_loader\n",
     "            # loss_function does forward pass\n",
-    "            # Yota.jl grad function computes parameter gradients\n",
+    "            # Yota.jl grad function computes model parameter gradients in g[2]\n",
     "            loss, g = grad(loss_function, mlp, x, y)\n",
+    "            \n",
     "            # Optimiser updates parameters\n",
     "            Optimisers.update!(state, mlp, g[2])\n",
     "            push!(losses, loss)\n",
-    "            # TODO: Add accuracy computation\n",
     "        end\n",
     "        @info(\"epoch $epoch loss = $(mean(losses))\")\n",
     "    end\n",
     "end"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "evaluate (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "function evaluate(mlp, test_loader)\n",
+    "    preds = []\n",
+    "    targets = []\n",
+    "    @time begin\n",
+    "        for (x, y) in test_loader\n",
+    "            # Get model predictions\n",
+    "            # Note argmax of nd-array gives CartesianIndex\n",
+    "            # Need to grab the first element of each CartesianIndex to get the true index\n",
+    "            logits = mlp(x)\n",
+    "            ŷ = map(i -> i[1], argmax(logits, dims=1))\n",
+    "            append!(preds, ŷ)\n",
+    "\n",
+    "            # Get true labels\n",
+    "            true_label = map(i -> i[1], argmax(y, dims=1))\n",
+    "            append!(targets, true_label)\n",
+    "        end\n",
+    "    end\n",
+    "    accuracy = sum(preds .== targets) / length(targets)\n",
+    "    return accuracy\n",
+    "end"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.068577 seconds (3.42 k allocations: 115.477 MiB, 27.36% gc time)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.9766"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "evaluate(mlp, test_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Avg time per train epoch: 0.753 sec\n",
+    "# Total time for train (with compile): 32.6 sec\n",
+    "# Avg time per eval step: 0.77 ms"
+   ]
   }
  ],
  "metadata": {

From 54ed384be3bb71dc393dac5d0c56544ef6ec12b9 Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Mon, 31 Oct 2022 20:41:23 -0400
Subject: [PATCH 4/8] Update .gitignore to incluce JSON outputs

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index f33838e..a85dddb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,3 +154,4 @@ docs/site/
 # committed for packages, but should be committed for applications that require a static
 # environment.
 Manifest.toml
+*.json

From dcab3459c9d6c2dc384a29f78a07e5c6d8bbc72a Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Mon, 31 Oct 2022 20:41:48 -0400
Subject: [PATCH 5/8] Finish notebook for avalon.jl

---
 multi-layer-perceptron/avalon-mlp.ipynb | 440 +++++++++++++-----------
 1 file changed, 243 insertions(+), 197 deletions(-)

diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon-mlp.ipynb
index 7bfd691..54e6bad 100644
--- a/multi-layer-perceptron/avalon-mlp.ipynb
+++ b/multi-layer-perceptron/avalon-mlp.ipynb
@@ -22,7 +22,8 @@
     "using Optimisers;\n",
     "using MLUtils: DataLoader;\n",
     "using OneHotArrays: onehotbatch\n",
-    "using Metrics;"
+    "using Metrics;\n",
+    "using TimerOutputs;"
    ]
   },
   {
@@ -208,8 +209,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);\n",
-    "test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=128);"
+    "batch_size = 128;\n",
+    "train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=batch_size);\n",
+    "test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=batch_size);"
    ]
   },
   {
@@ -280,7 +282,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Training loop"
+    "# Evaluation function"
    ]
   },
   {
@@ -289,171 +291,34 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " 25.837786 seconds (157.79 M allocations: 10.050 GiB, 3.78% gc time, 91.53% compilation time: 0% of which was recompilation)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 1 loss = 0.4370590642396691\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.757983 seconds (163.01 k allocations: 1.951 GiB, 11.52% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 2 loss = 0.20768273653621058\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.755957 seconds (163.01 k allocations: 1.951 GiB, 9.69% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 3 loss = 0.15264513650689454\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.766106 seconds (163.01 k allocations: 1.951 GiB, 11.41% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 4 loss = 0.12068533624691201\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.790535 seconds (163.01 k allocations: 1.951 GiB, 11.73% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 5 loss = 0.09948631426212774\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.749042 seconds (163.01 k allocations: 1.951 GiB, 10.17% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 6 loss = 0.08462848175017493\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.763076 seconds (163.01 k allocations: 1.951 GiB, 11.58% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 7 loss = 0.07293157247018839\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.737009 seconds (163.01 k allocations: 1.951 GiB, 8.39% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 8 loss = 0.06216332802989073\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.724461 seconds (163.02 k allocations: 1.951 GiB, 8.17% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 9 loss = 0.05457949791791655\n",
-      "└ @ Main In[13]:13\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  0.742743 seconds (163.01 k allocations: 1.951 GiB, 9.23% gc time)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "┌ Info: epoch 10 loss = 0.047562408875107556\n",
-      "└ @ Main In[13]:13\n"
-     ]
+     "data": {
+      "text/plain": [
+       "evaluate (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "for epoch in 1:10\n",
-    "    losses = []\n",
-    "    @time begin\n",
-    "        for (x, y) in train_loader\n",
-    "            # loss_function does forward pass\n",
-    "            # Yota.jl grad function computes model parameter gradients in g[2]\n",
-    "            loss, g = grad(loss_function, mlp, x, y)\n",
-    "            \n",
-    "            # Optimiser updates parameters\n",
-    "            Optimisers.update!(state, mlp, g[2])\n",
-    "            push!(losses, loss)\n",
-    "        end\n",
-    "        @info(\"epoch $epoch loss = $(mean(losses))\")\n",
+    "function evaluate(mlp, test_loader)\n",
+    "    preds = []\n",
+    "    targets = []\n",
+    "    for (x, y) in test_loader\n",
+    "        # Get model predictions\n",
+    "        # Note argmax of nd-array gives CartesianIndex\n",
+    "        # Need to grab the first element of each CartesianIndex to get the true index\n",
+    "        logits = mlp(x)\n",
+    "        ŷ = map(i -> i[1], argmax(logits, dims=1))\n",
+    "        append!(preds, ŷ)\n",
+    "\n",
+    "        # Get true labels\n",
+    "        true_label = map(i -> i[1], argmax(y, dims=1))\n",
+    "        append!(targets, true_label)\n",
     "    end\n",
+    "    accuracy = sum(preds .== targets) / length(targets)\n",
+    "    return accuracy\n",
     "end"
    ]
   },
@@ -461,7 +326,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Evaluation"
+    "# Training loop"
    ]
   },
   {
@@ -472,7 +337,14 @@
     {
      "data": {
       "text/plain": [
-       "evaluate (generic function with 1 method)"
+       "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────\u001b[22m\n",
+       "\u001b[0m\u001b[1m                   \u001b[22m         Time                    Allocations      \n",
+       "                   ───────────────────────   ────────────────────────\n",
+       " Tot / % measured:      345ms /   0.0%           45.3MiB /   0.0%    \n",
+       "\n",
+       " Section   ncalls     time    %tot     avg     alloc    %tot      avg\n",
+       " ────────────────────────────────────────────────────────────────────\n",
+       "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────\u001b[22m"
       ]
      },
      "execution_count": 14,
@@ -481,44 +353,131 @@
     }
    ],
    "source": [
-    "function evaluate(mlp, test_loader)\n",
-    "    preds = []\n",
-    "    targets = []\n",
-    "    @time begin\n",
-    "        for (x, y) in test_loader\n",
-    "            # Get model predictions\n",
-    "            # Note argmax of nd-array gives CartesianIndex\n",
-    "            # Need to grab the first element of each CartesianIndex to get the true index\n",
-    "            logits = mlp(x)\n",
-    "            ŷ = map(i -> i[1], argmax(logits, dims=1))\n",
-    "            append!(preds, ŷ)\n",
-    "\n",
-    "            # Get true labels\n",
-    "            true_label = map(i -> i[1], argmax(y, dims=1))\n",
-    "            append!(targets, true_label)\n",
-    "        end\n",
-    "    end\n",
-    "    accuracy = sum(preds .== targets) / length(targets)\n",
-    "    return accuracy\n",
-    "end"
+    "# Setup timing output\n",
+    "const to = TimerOutput()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "  0.068577 seconds (3.42 k allocations: 115.477 MiB, 27.36% gc time)\n"
+      "┌ Info: epoch 1 loss = 0.4375657584243529\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 1 eval accuracy = 0.9304\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 2 loss = 0.21220267083301655\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 2 eval accuracy = 0.9471\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 3 loss = 0.1592959047496635\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 3 eval accuracy = 0.9596\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 4 loss = 0.12536456292602383\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 4 eval accuracy = 0.9649\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 5 loss = 0.10332849547590327\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 5 eval accuracy = 0.9678\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 6 loss = 0.08698907676456932\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 6 eval accuracy = 0.9693\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 7 loss = 0.0743854635873991\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 7 eval accuracy = 0.9723\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 8 loss = 0.06411696637116769\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 8 eval accuracy = 0.9747\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 9 loss = 0.055365824426301447\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 9 eval accuracy = 0.9742\n",
+      "└ @ Main In[15]:22\n",
+      "┌ Info: epoch 10 loss = 0.04838905479051723\n",
+      "└ @ Main In[15]:17\n",
+      "┌ Info: epoch 10 eval accuracy = 0.9744\n",
+      "└ @ Main In[15]:22\n"
      ]
-    },
+    }
+   ],
+   "source": [
+    "last_loss = 0;\n",
+    "@timeit to \"total_training_time\" begin\n",
+    "    for epoch in 1:10\n",
+    "        timing_name = epoch > 1 ? \"average_epoch_training_time\" : \"train_jit\"\n",
+    "        @timeit to timing_name begin\n",
+    "            losses = []\n",
+    "            for (x, y) in train_loader\n",
+    "                # loss_function does forward pass\n",
+    "                # Yota.jl grad function computes model parameter gradients in g[2]\n",
+    "                loss, g = grad(loss_function, mlp, x, y)\n",
+    "                \n",
+    "                # Optimiser updates parameters\n",
+    "                Optimisers.update!(state, mlp, g[2])\n",
+    "                push!(losses, loss)\n",
+    "            end\n",
+    "            last_loss = mean(losses)\n",
+    "            @info(\"epoch $epoch loss = $(mean(losses))\")\n",
+    "        end\n",
+    "        timing_name = epoch > 1 ? \"average_inference_time\" : \"eval_jit\"\n",
+    "        @timeit to timing_name begin\n",
+    "            acc = evaluate(mlp, test_loader)\n",
+    "            @info(\"epoch $epoch eval accuracy = $(acc)\")\n",
+    "        end\n",
+    "    end\n",
+    "end"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.9766"
+       "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m\n",
+       "\u001b[0m\u001b[1m                               \u001b[22m         Time                    Allocations      \n",
+       "                               ───────────────────────   ────────────────────────\n",
+       "       Tot / % measured:            33.3s /  98.1%           29.0GiB /  99.7%    \n",
+       "\n",
+       " Section               ncalls     time    %tot     avg     alloc    %tot      avg\n",
+       " ────────────────────────────────────────────────────────────────────────────────\n",
+       " total_training_time        1    32.6s  100.0%   32.6s   28.9GiB  100.0%  28.9GiB\n",
+       "   train_jit                1    26.0s   79.6%   26.0s   10.0GiB   34.8%  10.0GiB\n",
+       "   average_epoch_tr...      9    5.80s   17.8%   645ms   17.6GiB   60.8%  1.95GiB\n",
+       "   eval_jit                 1    559ms    1.7%   559ms    283MiB    1.0%   283MiB\n",
+       "   average_inferenc...      9    296ms    0.9%  32.9ms   1.02GiB    3.5%   116MiB\n",
+       "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "to"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.644896347"
       ]
      },
      "execution_count": 17,
@@ -527,18 +486,105 @@
     }
    ],
    "source": [
-    "evaluate(mlp, test_loader)"
+    "# Train time\n",
+    "# Exclude jit time\n",
+    "average_epoch_train_time = TimerOutputs.time(to[\"total_training_time\"][\"average_epoch_training_time\"]) / (9 * 1e9)  # Outputs in nanoseconds, conver to seconds"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.41594755133614625"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Eval batch time\n",
+    "# Exclude jit time\n",
+    "num_batches = length(test_loader)\n",
+    "average_eval_batch_time = TimerOutputs.time(to[\"total_training_time\"][\"average_inference_time\"]) / (9 * 1e6 * num_batches)  # Outputs in nanoseconds, conver to milliseconds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9744"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_train_time = TimerOutputs.time(to[\"total_training_time\"])\n",
+    "final_eval_accuracy = evaluate(mlp, test_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dict{String, Any} with 9 entries:\n",
+       "  \"task\"                         => \"classification\"\n",
+       "  \"framework_name\"               => \"Avalon.jl\"\n",
+       "  \"final_trianing_loss\"          => 0.0483891\n",
+       "  \"total_training_time\"          => 32647958292\n",
+       "  \"average_epoch_training_time\"  => 0.644896\n",
+       "  \"final_evaluation_accuracy\"    => 0.9744\n",
+       "  \"model_name\"                   => \"MLP\"\n",
+       "  \"dataset\"                      => \"MNIST Digits\"\n",
+       "  \"average_batch_inference_time\" => 0.415948"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metrics = Dict(\n",
+    "    \"model_name\" => \"MLP\",\n",
+    "    \"dataset\" => \"MNIST Digits\",\n",
+    "    \"framework_name\" => \"Avalon.jl\",\n",
+    "    \"task\" => \"classification\",\n",
+    "    \"total_training_time\" => total_train_time,\n",
+    "    \"average_epoch_training_time\" => average_epoch_train_time,\n",
+    "    \"average_batch_inference_time\" => average_eval_batch_time,\n",
+    "    \"final_trianing_loss\" => last_loss,\n",
+    "    \"final_evaluation_accuracy\" => final_eval_accuracy\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Avg time per train epoch: 0.753 sec\n",
-    "# Total time for train (with compile): 32.6 sec\n",
-    "# Avg time per eval step: 0.77 ms"
+    "using JSON;\n",
+    "\n",
+    "open(\"m1-avalon-mlp.json\",\"w\") do f\n",
+    "    JSON.print(f, metrics)\n",
+    "end"
    ]
   }
  ],

From 2b106762a235a3bd522a6af13cf4112b400cfb66 Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Mon, 31 Oct 2022 20:42:38 -0400
Subject: [PATCH 6/8] Rename files

---
 .../{avalon-mlp.ipynb => avalon_notebook.ipynb}                   | 0
 multi-layer-perceptron/{avalon-mlp.jl => avalon_test.jl}          | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename multi-layer-perceptron/{avalon-mlp.ipynb => avalon_notebook.ipynb} (100%)
 rename multi-layer-perceptron/{avalon-mlp.jl => avalon_test.jl} (100%)

diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon_notebook.ipynb
similarity index 100%
rename from multi-layer-perceptron/avalon-mlp.ipynb
rename to multi-layer-perceptron/avalon_notebook.ipynb
diff --git a/multi-layer-perceptron/avalon-mlp.jl b/multi-layer-perceptron/avalon_test.jl
similarity index 100%
rename from multi-layer-perceptron/avalon-mlp.jl
rename to multi-layer-perceptron/avalon_test.jl

From ec1582a0731b31d069cdd3f75b8f7ee4033d8a4b Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Mon, 31 Oct 2022 20:45:49 -0400
Subject: [PATCH 7/8] Convert total training time to seconds

---
 multi-layer-perceptron/avalon_notebook.ipynb | 72 ++++++++++----------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/multi-layer-perceptron/avalon_notebook.ipynb b/multi-layer-perceptron/avalon_notebook.ipynb
index 54e6bad..caa2c1d 100644
--- a/multi-layer-perceptron/avalon_notebook.ipynb
+++ b/multi-layer-perceptron/avalon_notebook.ipynb
@@ -340,7 +340,7 @@
        "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────\u001b[22m\n",
        "\u001b[0m\u001b[1m                   \u001b[22m         Time                    Allocations      \n",
        "                   ───────────────────────   ────────────────────────\n",
-       " Tot / % measured:      345ms /   0.0%           45.3MiB /   0.0%    \n",
+       " Tot / % measured:      341ms /   0.0%           45.3MiB /   0.0%    \n",
        "\n",
        " Section   ncalls     time    %tot     avg     alloc    %tot      avg\n",
        " ────────────────────────────────────────────────────────────────────\n",
@@ -366,45 +366,45 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "┌ Info: epoch 1 loss = 0.4375657584243529\n",
+      "┌ Info: epoch 1 loss = 0.43098854388539676\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 1 eval accuracy = 0.9304\n",
+      "┌ Info: epoch 1 eval accuracy = 0.9329\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 2 loss = 0.21220267083301655\n",
+      "┌ Info: epoch 2 loss = 0.20844910683454007\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 2 eval accuracy = 0.9471\n",
+      "┌ Info: epoch 2 eval accuracy = 0.9482\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 3 loss = 0.1592959047496635\n",
+      "┌ Info: epoch 3 loss = 0.15448442086061948\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 3 eval accuracy = 0.9596\n",
+      "┌ Info: epoch 3 eval accuracy = 0.9593\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 4 loss = 0.12536456292602383\n",
+      "┌ Info: epoch 4 loss = 0.12039177602707632\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 4 eval accuracy = 0.9649\n",
+      "┌ Info: epoch 4 eval accuracy = 0.966\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 5 loss = 0.10332849547590327\n",
+      "┌ Info: epoch 5 loss = 0.0983232690863611\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 5 eval accuracy = 0.9678\n",
+      "┌ Info: epoch 5 eval accuracy = 0.9675\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 6 loss = 0.08698907676456932\n",
+      "┌ Info: epoch 6 loss = 0.08252257340927549\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 6 eval accuracy = 0.9693\n",
+      "┌ Info: epoch 6 eval accuracy = 0.972\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 7 loss = 0.0743854635873991\n",
+      "┌ Info: epoch 7 loss = 0.07077896451852372\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 7 eval accuracy = 0.9723\n",
+      "┌ Info: epoch 7 eval accuracy = 0.9729\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 8 loss = 0.06411696637116769\n",
+      "┌ Info: epoch 8 loss = 0.06109750930723943\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 8 eval accuracy = 0.9747\n",
+      "┌ Info: epoch 8 eval accuracy = 0.9755\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 9 loss = 0.055365824426301447\n",
+      "┌ Info: epoch 9 loss = 0.05307631371444329\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 9 eval accuracy = 0.9742\n",
+      "┌ Info: epoch 9 eval accuracy = 0.976\n",
       "└ @ Main In[15]:22\n",
-      "┌ Info: epoch 10 loss = 0.04838905479051723\n",
+      "┌ Info: epoch 10 loss = 0.04616427247831039\n",
       "└ @ Main In[15]:17\n",
-      "┌ Info: epoch 10 eval accuracy = 0.9744\n",
+      "┌ Info: epoch 10 eval accuracy = 0.9757\n",
       "└ @ Main In[15]:22\n"
      ]
     }
@@ -448,15 +448,15 @@
        "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m\n",
        "\u001b[0m\u001b[1m                               \u001b[22m         Time                    Allocations      \n",
        "                               ───────────────────────   ────────────────────────\n",
-       "       Tot / % measured:            33.3s /  98.1%           29.0GiB /  99.7%    \n",
+       "       Tot / % measured:            33.6s /  98.1%           29.0GiB /  99.7%    \n",
        "\n",
        " Section               ncalls     time    %tot     avg     alloc    %tot      avg\n",
        " ────────────────────────────────────────────────────────────────────────────────\n",
-       " total_training_time        1    32.6s  100.0%   32.6s   28.9GiB  100.0%  28.9GiB\n",
-       "   train_jit                1    26.0s   79.6%   26.0s   10.0GiB   34.8%  10.0GiB\n",
-       "   average_epoch_tr...      9    5.80s   17.8%   645ms   17.6GiB   60.8%  1.95GiB\n",
+       " total_training_time        1    32.9s  100.0%   32.9s   28.9GiB  100.0%  28.9GiB\n",
+       "   train_jit                1    26.2s   79.7%   26.2s   10.0GiB   34.8%  10.0GiB\n",
+       "   average_epoch_tr...      9    5.82s   17.7%   646ms   17.6GiB   60.8%  1.95GiB\n",
        "   eval_jit                 1    559ms    1.7%   559ms    283MiB    1.0%   283MiB\n",
-       "   average_inferenc...      9    296ms    0.9%  32.9ms   1.02GiB    3.5%   116MiB\n",
+       "   average_inferenc...      9    305ms    0.9%  33.9ms   1.02GiB    3.5%   116MiB\n",
        "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m"
       ]
      },
@@ -477,7 +477,7 @@
     {
      "data": {
       "text/plain": [
-       "0.644896347"
+       "0.6461179073333333"
       ]
      },
      "execution_count": 17,
@@ -499,7 +499,7 @@
     {
      "data": {
       "text/plain": [
-       "0.41594755133614625"
+       "0.42875644444444444"
       ]
      },
      "execution_count": 18,
@@ -522,7 +522,7 @@
     {
      "data": {
       "text/plain": [
-       "0.9744"
+       "0.9757"
       ]
      },
      "execution_count": 19,
@@ -531,7 +531,7 @@
     }
    ],
    "source": [
-    "total_train_time = TimerOutputs.time(to[\"total_training_time\"])\n",
+    "total_train_time = TimerOutputs.time(to[\"total_training_time\"]) / 1e9  # Convert nanos to seconds\n",
     "final_eval_accuracy = evaluate(mlp, test_loader)"
    ]
   },
@@ -546,13 +546,13 @@
        "Dict{String, Any} with 9 entries:\n",
        "  \"task\"                         => \"classification\"\n",
        "  \"framework_name\"               => \"Avalon.jl\"\n",
-       "  \"final_trianing_loss\"          => 0.0483891\n",
-       "  \"total_training_time\"          => 32647958292\n",
-       "  \"average_epoch_training_time\"  => 0.644896\n",
-       "  \"final_evaluation_accuracy\"    => 0.9744\n",
+       "  \"final_trianing_loss\"          => 0.0461643\n",
+       "  \"total_training_time\"          => 32.9112\n",
+       "  \"average_epoch_training_time\"  => 0.646118\n",
+       "  \"final_evaluation_accuracy\"    => 0.9757\n",
        "  \"model_name\"                   => \"MLP\"\n",
        "  \"dataset\"                      => \"MNIST Digits\"\n",
-       "  \"average_batch_inference_time\" => 0.415948"
+       "  \"average_batch_inference_time\" => 0.428756"
       ]
      },
      "execution_count": 20,
@@ -576,7 +576,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [

From 2bd6f7212f48f6a273d997e5475cd3258c82635e Mon Sep 17 00:00:00 2001
From: Trevor Yu <trevor.c.yu@gmail.com>
Date: Mon, 31 Oct 2022 21:05:50 -0400
Subject: [PATCH 8/8] Finish script

---
 multi-layer-perceptron/avalon_test.jl | 203 +++++++++++++++++++++++++-
 1 file changed, 202 insertions(+), 1 deletion(-)

diff --git a/multi-layer-perceptron/avalon_test.jl b/multi-layer-perceptron/avalon_test.jl
index 4833830..af2e756 100644
--- a/multi-layer-perceptron/avalon_test.jl
+++ b/multi-layer-perceptron/avalon_test.jl
@@ -1 +1,202 @@
-# Avalon.jl implementation of multi-layer perceptron
\ No newline at end of file
+# Avalon.jl implementation of multi-layer perceptron
+
+# Imports
+using Yota;
+using MLDatasets;
+using NNlib;
+using Statistics;
+using Distributions;
+using Functors;
+using Optimisers;
+using MLUtils: DataLoader;
+using OneHotArrays: onehotbatch
+using Metrics;
+using TimerOutputs;
+using JSON;
+
+
+# Primitives
+# Linear layer
+mutable struct Linear
+    W::AbstractMatrix{T} where T
+    b::AbstractVector{T} where T
+end
+
+@functor Linear
+
+# Init
+function Linear(in_features::Int, out_features::Int)
+    k_sqrt = sqrt(1 / in_features)
+    d = Uniform(-k_sqrt, k_sqrt)
+    return Linear(rand(d, out_features, in_features), rand(d, out_features))
+end
+Linear(in_out::Pair{Int, Int}) = Linear(in_out[1], in_out[2])
+
+function Base.show(io::IO, l::Linear)
+    o, i = size(l.W)
+    print(io, "Linear($i=>$o)")
+end
+
+# Forward
+(l::Linear)(x::Union{AbstractVector{T}, AbstractMatrix{T}}) where T = l.W * x .+ l.b
+
+# Cross entropy loss
+function logitcrossentropy(ŷ, y; dims=1, agg=mean)
+    # Compute cross entropy loss from logits
+    # Cross entropy computed from NLL loss on logsoftmax of model outputs
+      agg(.-sum(y .* logsoftmax(ŷ; dims=dims); dims=dims));
+end
+
+
+# Model definition
+mutable struct Net
+    fc1::Linear
+    fc2::Linear
+end
+
+# Need to mark functor for Optimizer to work
+@functor Net
+
+# Init
+Net() = Net(
+    Linear(28*28, 100),
+    Linear(100, 10)
+)
+
+# Forward
+function (model::Net)(x::AbstractArray)
+    x = reshape(x, 28*28, :)
+    x = model.fc1(x)
+    x = relu(x)
+    x = model.fc2(x)
+    return x
+end
+
+# Create objective function to optimize
+function loss_function(model::Net, x::AbstractArray, y::AbstractArray)
+    ŷ = model(x)
+    loss = logitcrossentropy(ŷ, y)
+    return loss
+end
+
+
+# Evaluation function
+function evaluate(mlp::Net, test_loader::DataLoader)::Number
+    preds = []
+    targets = []
+    for (x, y) in test_loader
+        # Get model predictions
+        # Note argmax of nd-array gives CartesianIndex
+        # Need to grab the first element of each CartesianIndex to get the true index
+        logits = mlp(x)
+        ŷ = map(i -> i[1], argmax(logits, dims=1))
+        append!(preds, ŷ)
+
+        # Get true labels
+        true_label = map(i -> i[1], argmax(y, dims=1))
+        append!(targets, true_label)
+    end
+    accuracy = sum(preds .== targets) / length(targets)
+    return accuracy
+end
+
+
+# Data loading and processing
+function get_data_loaders(; batch_size=128)
+    # Data loading
+    train_dataset = MNIST(split=:train);
+    test_dataset = MNIST(split=:test);
+
+    X_train = train_dataset.features;
+    Y_train = train_dataset.targets;
+
+    X_test = test_dataset.features;
+    Y_test = test_dataset.targets;
+
+    # Flatten features to be 784 dim
+    X_train = reshape(X_train, 784, :);  # (dim x batch)
+    X_test = reshape(X_test, 784, :);
+
+    # Convert targets to one-hot vectors
+    Y_train = onehotbatch(Y_train, 0:9);
+    Y_test = onehotbatch(Y_test, 0:9);  # (dim x batch)
+
+    train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=batch_size);
+    test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=batch_size);
+    return train_loader, test_loader
+end
+
+# Setup timing
+const to = TimerOutput()
+
+
+function main()
+    train_loader, test_loader = get_data_loaders(batch_size=128)
+    
+    # Setup model and optimizer
+    mlp = Net()
+
+    # Default Β is (0.9, 0.999)
+    state = Optimisers.setup(Optimisers.Adam(1e-3), mlp);
+
+    # Training loop
+    last_loss = 0;
+    @timeit to "total_training_time" begin
+        for epoch in 1:10
+            # Store the timing in the first epoch into a separate timer for jit
+            timing_name = epoch > 1 ? "train_epoch" : "train_jit"
+            @timeit to timing_name begin
+                losses = []
+                for (x, y) in train_loader
+                    # loss_function does forward pass
+                    # Yota.jl grad function computes model parameter gradients in g[2]
+                    loss, g = grad(loss_function, mlp, x, y)
+                    
+                    # Optimiser updates parameters
+                    Optimisers.update!(state, mlp, g[2])
+                    push!(losses, loss)
+                end
+                last_loss = mean(losses)
+                @info("epoch $epoch loss = $(mean(losses))")
+            end
+            timing_name = epoch > 1 ? "eval_epoch" : "eval_jit"
+            @timeit to timing_name begin
+                acc = evaluate(mlp, test_loader)
+                @info("epoch $epoch eval accuracy = $(acc)")
+            end
+        end
+    end
+
+    # Compute timing metrics
+    # Outputs in nanoseconds, convert to seconds
+    average_epoch_train_time = TimerOutputs.time(to["total_training_time"]["train_epoch"]) / (9 * 1e9)
+    total_train_time = TimerOutputs.time(to["total_training_time"]) / 1e9
+
+
+    num_batches = length(test_loader)
+    # Outputs in nanoseconds, conver to milliseconds
+    average_eval_batch_time = TimerOutputs.time(to["total_training_time"]["eval_epoch"]) / (9 * 1e6 * num_batches)
+
+    final_eval_accuracy = evaluate(mlp, test_loader)
+
+    metrics = Dict(
+        "model_name" => "MLP",
+        "dataset" => "MNIST Digits",
+        "framework_name" => "Avalon.jl",
+        "task" => "classification",
+        "total_training_time" => total_train_time,
+        "average_epoch_training_time" => average_epoch_train_time,
+        "average_batch_inference_time" => average_eval_batch_time,
+        "final_trianing_loss" => last_loss,
+        "final_evaluation_accuracy" => final_eval_accuracy
+    )
+    open("m1-avalon-mlp.json","w") do f
+        JSON.print(f, metrics)
+    end
+end
+
+
+# Run main function
+if abspath(PROGRAM_FILE) == @__FILE__
+    main()
+end
\ No newline at end of file