From 556149e5cd47b239849a196a82e7bd3a20aaaba8 Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Sat, 1 Oct 2022 10:38:36 -0400 Subject: [PATCH 1/8] Add avalon-mlp.jl file --- multi-layer-perceptron/avalon-mlp.jl | 1 + 1 file changed, 1 insertion(+) create mode 100644 multi-layer-perceptron/avalon-mlp.jl diff --git a/multi-layer-perceptron/avalon-mlp.jl b/multi-layer-perceptron/avalon-mlp.jl new file mode 100644 index 0000000..4833830 --- /dev/null +++ b/multi-layer-perceptron/avalon-mlp.jl @@ -0,0 +1 @@ +# Avalon.jl implementation of multi-layer perceptron \ No newline at end of file From 2e6e41115cd618a30445c319823fbff36eeed7af Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Sun, 16 Oct 2022 23:57:03 -0400 Subject: [PATCH 2/8] Add Avalon MLP notebook --- multi-layer-perceptron/avalon-mlp.ipynb | 477 ++++++++++++++++++++++++ 1 file changed, 477 insertions(+) create mode 100644 multi-layer-perceptron/avalon-mlp.ipynb diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon-mlp.ipynb new file mode 100644 index 0000000..aff56a5 --- /dev/null +++ b/multi-layer-perceptron/avalon-mlp.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "using Yota;\n", + "using MLDatasets;\n", + "using NNlib;\n", + "using Statistics;\n", + "using Distributions;\n", + "using Functors;\n", + "using Optimisers;\n", + "using Flux.Data;\n", + "using Flux: onehotbatch, @epochs;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Primitives" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "mutable struct Linear\n", + " W::AbstractMatrix{T} where T\n", + " b::AbstractVector{T} where T\n", + "end\n", + "\n", + "@functor Linear\n", + "\n", + "# Init\n", + "function Linear(in_features::Int, out_features::Int)\n", + " k_sqrt = sqrt(1 / in_features)\n", + " d = Uniform(-k_sqrt, k_sqrt)\n", + " return Linear(rand(d, out_features, in_features), rand(d, out_features))\n", + "end\n", + "Linear(in_out::Pair{Int, Int}) = Linear(in_out[1], in_out[2])\n", + "\n", + "function Base.show(io::IO, l::Linear)\n", + " o, i = size(l.W)\n", + " print(io, \"Linear($i=>$o)\")\n", + "end\n", + "\n", + "# Forward\n", + "(l::Linear)(x::Union{AbstractVector{T}, AbstractMatrix{T}}) where T = l.W * x .+ l.b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Logit Cross Entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "logitcrossentropy (generic function with 1 method)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function logitcrossentropy(ŷ, y; dims = 1, agg = mean)\n", + " agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims));\n", + " end" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Define the model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mutable struct Net\n", + " fc1::Linear\n", + " fc2::Linear\n", + "end\n", + "\n", + "@functor Net\n", + "\n", + "# Init\n", + "Net() = Net(\n", + " Linear(28*28, 100),\n", + " Linear(100, 10)\n", + ")\n", + "\n", + "# Forward\n", + "function (model::Net)(x::AbstractArray)\n", + " x = reshape(x, 28*28, :)\n", + " x = model.fc1(x)\n", + " x = relu(x)\n", + " x = model.fc2(x)\n", + " return x\n", + "end" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = MNIST(dir=\"/Users/trevoryu/Code/data/mnist\", split=:train);\n", + "test_dataset = MNIST(dir=\"/Users/trevoryu/Code/data/mnist\", split=:test);\n", + "\n", + "X_train = train_dataset.features;\n", + "Y_train = train_dataset.targets;\n", + "\n", + "X_test = test_dataset.features;\n", + "Y_test = test_dataset.targets;" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(28, 28, 10000)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "size(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Flatten features to be 784 dim\n", + "X_train = reshape(X_train, 784, :); # (dim x batch)\n", + "X_test = reshape(X_test, 784, :);" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert targets to one-hot vectors\n", + "Y_train = onehotbatch(Y_train, 0:9);\n", + "Y_test = onehotbatch(Y_test, 0:9); # (dim x batch)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training setup" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Net(Linear(784=>100), Linear(100=>10))" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Make model\n", + "mlp = Net()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup Adam optimizer\n", + "# Default Β is (0.9, 0.999)\n", + "state = Optimisers.setup(Optimisers.Adam(1e-3), mlp);" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "loss_function (generic function with 1 method)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create objective function\n", + "function loss_function(model::Net, x::AbstractArray, y::AbstractArray)\n", + " ŷ = model(x)\n", + " loss = logitcrossentropy(ŷ, y)\n", + " return loss\n", + "end" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training loop" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.774987 seconds (162.82 k allocations: 1.951 GiB, 12.71% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 1 loss = 0.04094025307690705\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.646125 seconds (163.01 k allocations: 1.951 GiB, 10.26% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 2 loss = 0.03572923218462636\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.652080 seconds (163.01 k allocations: 1.951 GiB, 9.54% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 3 loss = 0.03166147383022924\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.682782 seconds (163.01 k allocations: 1.951 GiB, 11.74% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 4 loss = 0.027879119237525167\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.666789 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 5 loss = 0.02481490266472439\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.624532 seconds (163.01 k allocations: 1.951 GiB, 8.85% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 6 loss = 0.021184432907923646\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.604429 seconds (163.01 k allocations: 1.951 GiB, 8.57% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 7 loss = 0.019366974012950174\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.622498 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 8 loss = 0.016626981172646552\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.610364 seconds (163.01 k allocations: 1.951 GiB, 8.79% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 9 loss = 0.014004979298361142\n", + "└ @ Main In[18]:12\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.661410 seconds (163.01 k allocations: 1.951 GiB, 10.90% gc time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: epoch 10 loss = 0.012511318528209223\n", + "└ @ Main In[18]:12\n" + ] + } + ], + "source": [ + "for epoch in 1:10\n", + " losses = []\n", + " @time begin\n", + " for (x, y) in loader\n", + " # loss_function does forward pass\n", + " # Yota.jl grad function computes parameter gradients\n", + " loss, g = grad(loss_function, mlp, x, y)\n", + " # Optimiser updates parameters\n", + " Optimisers.update!(state, mlp, g[2])\n", + " push!(losses, loss)\n", + " # TODO: Add accuracy computation\n", + " end\n", + " @info(\"epoch $epoch loss = $(mean(losses))\")\n", + " end\n", + "end" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.8.0", + "language": "julia", + "name": "julia-1.8" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.8.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d826c32cf7bcea36a843cb6a0944c3138370d0cf Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Fri, 21 Oct 2022 22:45:46 -0400 Subject: [PATCH 3/8] Add evaluation to notebook --- multi-layer-perceptron/avalon-mlp.ipynb | 179 ++++++++++++++++++------ 1 file changed, 134 insertions(+), 45 deletions(-) diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon-mlp.ipynb index aff56a5..7bfd691 100644 --- a/multi-layer-perceptron/avalon-mlp.ipynb +++ b/multi-layer-perceptron/avalon-mlp.ipynb @@ -20,8 +20,9 @@ "using Distributions;\n", "using Functors;\n", "using Optimisers;\n", - "using Flux.Data;\n", - "using Flux: onehotbatch, @epochs;" + "using MLUtils: DataLoader;\n", + "using OneHotArrays: onehotbatch\n", + "using Metrics;" ] }, { @@ -92,8 +93,10 @@ } ], "source": [ - "function logitcrossentropy(ŷ, y; dims = 1, agg = mean)\n", - " agg(.-sum(y .* logsoftmax(ŷ; dims = dims); dims = dims));\n", + "function logitcrossentropy(ŷ, y; dims=1, agg=mean)\n", + " # Compute cross entropy loss from logits\n", + " # Cross entropy computed from NLL loss on logsoftmax of model outputs\n", + " agg(.-sum(y .* logsoftmax(ŷ; dims=dims); dims=dims));\n", " end" ] }, @@ -115,6 +118,7 @@ " fc2::Linear\n", "end\n", "\n", + "# Need to mark functor for Optimizer to work\n", "@functor Net\n", "\n", "# Init\n", @@ -204,7 +208,8 @@ "metadata": {}, "outputs": [], "source": [ - "loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);" + "train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);\n", + "test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=128);" ] }, { @@ -216,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -225,7 +230,7 @@ "Net(Linear(784=>100), Linear(100=>10))" ] }, - "execution_count": 14, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -237,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -248,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -257,13 +262,13 @@ "loss_function (generic function with 1 method)" ] }, - "execution_count": 16, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Create objective function\n", + "# Create objective function to optimize\n", "function loss_function(model::Net, x::AbstractArray, y::AbstractArray)\n", " ŷ = model(x)\n", " loss = logitcrossentropy(ŷ, y)\n", @@ -280,157 +285,157 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " 0.774987 seconds (162.82 k allocations: 1.951 GiB, 12.71% gc time)\n" + " 25.837786 seconds (157.79 M allocations: 10.050 GiB, 3.78% gc time, 91.53% compilation time: 0% of which was recompilation)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 1 loss = 0.04094025307690705\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 1 loss = 0.4370590642396691\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.646125 seconds (163.01 k allocations: 1.951 GiB, 10.26% gc time)\n" + " 0.757983 seconds (163.01 k allocations: 1.951 GiB, 11.52% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 2 loss = 0.03572923218462636\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 2 loss = 0.20768273653621058\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.652080 seconds (163.01 k allocations: 1.951 GiB, 9.54% gc time)\n" + " 0.755957 seconds (163.01 k allocations: 1.951 GiB, 9.69% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 3 loss = 0.03166147383022924\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 3 loss = 0.15264513650689454\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.682782 seconds (163.01 k allocations: 1.951 GiB, 11.74% gc time)\n" + " 0.766106 seconds (163.01 k allocations: 1.951 GiB, 11.41% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 4 loss = 0.027879119237525167\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 4 loss = 0.12068533624691201\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.666789 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n" + " 0.790535 seconds (163.01 k allocations: 1.951 GiB, 11.73% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 5 loss = 0.02481490266472439\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 5 loss = 0.09948631426212774\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.624532 seconds (163.01 k allocations: 1.951 GiB, 8.85% gc time)\n" + " 0.749042 seconds (163.01 k allocations: 1.951 GiB, 10.17% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 6 loss = 0.021184432907923646\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 6 loss = 0.08462848175017493\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.604429 seconds (163.01 k allocations: 1.951 GiB, 8.57% gc time)\n" + " 0.763076 seconds (163.01 k allocations: 1.951 GiB, 11.58% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 7 loss = 0.019366974012950174\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 7 loss = 0.07293157247018839\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.622498 seconds (163.01 k allocations: 1.951 GiB, 8.82% gc time)\n" + " 0.737009 seconds (163.01 k allocations: 1.951 GiB, 8.39% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 8 loss = 0.016626981172646552\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 8 loss = 0.06216332802989073\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.610364 seconds (163.01 k allocations: 1.951 GiB, 8.79% gc time)\n" + " 0.724461 seconds (163.02 k allocations: 1.951 GiB, 8.17% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 9 loss = 0.014004979298361142\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 9 loss = 0.05457949791791655\n", + "└ @ Main In[13]:13\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - " 0.661410 seconds (163.01 k allocations: 1.951 GiB, 10.90% gc time)\n" + " 0.742743 seconds (163.01 k allocations: 1.951 GiB, 9.23% gc time)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 10 loss = 0.012511318528209223\n", - "└ @ Main In[18]:12\n" + "┌ Info: epoch 10 loss = 0.047562408875107556\n", + "└ @ Main In[13]:13\n" ] } ], @@ -438,19 +443,103 @@ "for epoch in 1:10\n", " losses = []\n", " @time begin\n", - " for (x, y) in loader\n", + " for (x, y) in train_loader\n", " # loss_function does forward pass\n", - " # Yota.jl grad function computes parameter gradients\n", + " # Yota.jl grad function computes model parameter gradients in g[2]\n", " loss, g = grad(loss_function, mlp, x, y)\n", + " \n", " # Optimiser updates parameters\n", " Optimisers.update!(state, mlp, g[2])\n", " push!(losses, loss)\n", - " # TODO: Add accuracy computation\n", " end\n", " @info(\"epoch $epoch loss = $(mean(losses))\")\n", " end\n", "end" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "evaluate (generic function with 1 method)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function evaluate(mlp, test_loader)\n", + " preds = []\n", + " targets = []\n", + " @time begin\n", + " for (x, y) in test_loader\n", + " # Get model predictions\n", + " # Note argmax of nd-array gives CartesianIndex\n", + " # Need to grab the first element of each CartesianIndex to get the true index\n", + " logits = mlp(x)\n", + " ŷ = map(i -> i[1], argmax(logits, dims=1))\n", + " append!(preds, ŷ)\n", + "\n", + " # Get true labels\n", + " true_label = map(i -> i[1], argmax(y, dims=1))\n", + " append!(targets, true_label)\n", + " end\n", + " end\n", + " accuracy = sum(preds .== targets) / length(targets)\n", + " return accuracy\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0.068577 seconds (3.42 k allocations: 115.477 MiB, 27.36% gc time)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.9766" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "evaluate(mlp, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Avg time per train epoch: 0.753 sec\n", + "# Total time for train (with compile): 32.6 sec\n", + "# Avg time per eval step: 0.77 ms" + ] } ], "metadata": { From 54ed384be3bb71dc393dac5d0c56544ef6ec12b9 Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Mon, 31 Oct 2022 20:41:23 -0400 Subject: [PATCH 4/8] Update .gitignore to incluce JSON outputs --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f33838e..a85dddb 100644 --- a/.gitignore +++ b/.gitignore @@ -154,3 +154,4 @@ docs/site/ # committed for packages, but should be committed for applications that require a static # environment. Manifest.toml +*.json From dcab3459c9d6c2dc384a29f78a07e5c6d8bbc72a Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Mon, 31 Oct 2022 20:41:48 -0400 Subject: [PATCH 5/8] Finish notebook for avalon.jl --- multi-layer-perceptron/avalon-mlp.ipynb | 440 +++++++++++++----------- 1 file changed, 243 insertions(+), 197 deletions(-) diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon-mlp.ipynb index 7bfd691..54e6bad 100644 --- a/multi-layer-perceptron/avalon-mlp.ipynb +++ b/multi-layer-perceptron/avalon-mlp.ipynb @@ -22,7 +22,8 @@ "using Optimisers;\n", "using MLUtils: DataLoader;\n", "using OneHotArrays: onehotbatch\n", - "using Metrics;" + "using Metrics;\n", + "using TimerOutputs;" ] }, { @@ -208,8 +209,9 @@ "metadata": {}, "outputs": [], "source": [ - "train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=128);\n", - "test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=128);" + "batch_size = 128;\n", + "train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=batch_size);\n", + "test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=batch_size);" ] }, { @@ -280,7 +282,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Training loop" + "# Evaluation function" ] }, { @@ -289,171 +291,34 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " 25.837786 seconds (157.79 M allocations: 10.050 GiB, 3.78% gc time, 91.53% compilation time: 0% of which was recompilation)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 1 loss = 0.4370590642396691\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.757983 seconds (163.01 k allocations: 1.951 GiB, 11.52% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 2 loss = 0.20768273653621058\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.755957 seconds (163.01 k allocations: 1.951 GiB, 9.69% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 3 loss = 0.15264513650689454\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.766106 seconds (163.01 k allocations: 1.951 GiB, 11.41% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 4 loss = 0.12068533624691201\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.790535 seconds (163.01 k allocations: 1.951 GiB, 11.73% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 5 loss = 0.09948631426212774\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.749042 seconds (163.01 k allocations: 1.951 GiB, 10.17% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 6 loss = 0.08462848175017493\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.763076 seconds (163.01 k allocations: 1.951 GiB, 11.58% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 7 loss = 0.07293157247018839\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.737009 seconds (163.01 k allocations: 1.951 GiB, 8.39% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 8 loss = 0.06216332802989073\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.724461 seconds (163.02 k allocations: 1.951 GiB, 8.17% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 9 loss = 0.05457949791791655\n", - "└ @ Main In[13]:13\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0.742743 seconds (163.01 k allocations: 1.951 GiB, 9.23% gc time)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "┌ Info: epoch 10 loss = 0.047562408875107556\n", - "└ @ Main In[13]:13\n" - ] + "data": { + "text/plain": [ + "evaluate (generic function with 1 method)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "for epoch in 1:10\n", - " losses = []\n", - " @time begin\n", - " for (x, y) in train_loader\n", - " # loss_function does forward pass\n", - " # Yota.jl grad function computes model parameter gradients in g[2]\n", - " loss, g = grad(loss_function, mlp, x, y)\n", - " \n", - " # Optimiser updates parameters\n", - " Optimisers.update!(state, mlp, g[2])\n", - " push!(losses, loss)\n", - " end\n", - " @info(\"epoch $epoch loss = $(mean(losses))\")\n", + "function evaluate(mlp, test_loader)\n", + " preds = []\n", + " targets = []\n", + " for (x, y) in test_loader\n", + " # Get model predictions\n", + " # Note argmax of nd-array gives CartesianIndex\n", + " # Need to grab the first element of each CartesianIndex to get the true index\n", + " logits = mlp(x)\n", + " ŷ = map(i -> i[1], argmax(logits, dims=1))\n", + " append!(preds, ŷ)\n", + "\n", + " # Get true labels\n", + " true_label = map(i -> i[1], argmax(y, dims=1))\n", + " append!(targets, true_label)\n", " end\n", + " accuracy = sum(preds .== targets) / length(targets)\n", + " return accuracy\n", "end" ] }, @@ -461,7 +326,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Evaluation" + "# Training loop" ] }, { @@ -472,7 +337,14 @@ { "data": { "text/plain": [ - "evaluate (generic function with 1 method)" + "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────\u001b[22m\n", + "\u001b[0m\u001b[1m \u001b[22m Time Allocations \n", + " ─────────────────────── ────────────────────────\n", + " Tot / % measured: 345ms / 0.0% 45.3MiB / 0.0% \n", + "\n", + " Section ncalls time %tot avg alloc %tot avg\n", + " ────────────────────────────────────────────────────────────────────\n", + "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────\u001b[22m" ] }, "execution_count": 14, @@ -481,44 +353,131 @@ } ], "source": [ - "function evaluate(mlp, test_loader)\n", - " preds = []\n", - " targets = []\n", - " @time begin\n", - " for (x, y) in test_loader\n", - " # Get model predictions\n", - " # Note argmax of nd-array gives CartesianIndex\n", - " # Need to grab the first element of each CartesianIndex to get the true index\n", - " logits = mlp(x)\n", - " ŷ = map(i -> i[1], argmax(logits, dims=1))\n", - " append!(preds, ŷ)\n", - "\n", - " # Get true labels\n", - " true_label = map(i -> i[1], argmax(y, dims=1))\n", - " append!(targets, true_label)\n", - " end\n", - " end\n", - " accuracy = sum(preds .== targets) / length(targets)\n", - " return accuracy\n", - "end" + "# Setup timing output\n", + "const to = TimerOutput()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - " 0.068577 seconds (3.42 k allocations: 115.477 MiB, 27.36% gc time)\n" + "┌ Info: epoch 1 loss = 0.4375657584243529\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 1 eval accuracy = 0.9304\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 2 loss = 0.21220267083301655\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 2 eval accuracy = 0.9471\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 3 loss = 0.1592959047496635\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 3 eval accuracy = 0.9596\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 4 loss = 0.12536456292602383\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 4 eval accuracy = 0.9649\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 5 loss = 0.10332849547590327\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 5 eval accuracy = 0.9678\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 6 loss = 0.08698907676456932\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 6 eval accuracy = 0.9693\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 7 loss = 0.0743854635873991\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 7 eval accuracy = 0.9723\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 8 loss = 0.06411696637116769\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 8 eval accuracy = 0.9747\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 9 loss = 0.055365824426301447\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 9 eval accuracy = 0.9742\n", + "└ @ Main In[15]:22\n", + "┌ Info: epoch 10 loss = 0.04838905479051723\n", + "└ @ Main In[15]:17\n", + "┌ Info: epoch 10 eval accuracy = 0.9744\n", + "└ @ Main In[15]:22\n" ] - }, + } + ], + "source": [ + "last_loss = 0;\n", + "@timeit to \"total_training_time\" begin\n", + " for epoch in 1:10\n", + " timing_name = epoch > 1 ? \"average_epoch_training_time\" : \"train_jit\"\n", + " @timeit to timing_name begin\n", + " losses = []\n", + " for (x, y) in train_loader\n", + " # loss_function does forward pass\n", + " # Yota.jl grad function computes model parameter gradients in g[2]\n", + " loss, g = grad(loss_function, mlp, x, y)\n", + " \n", + " # Optimiser updates parameters\n", + " Optimisers.update!(state, mlp, g[2])\n", + " push!(losses, loss)\n", + " end\n", + " last_loss = mean(losses)\n", + " @info(\"epoch $epoch loss = $(mean(losses))\")\n", + " end\n", + " timing_name = epoch > 1 ? \"average_inference_time\" : \"eval_jit\"\n", + " @timeit to timing_name begin\n", + " acc = evaluate(mlp, test_loader)\n", + " @info(\"epoch $epoch eval accuracy = $(acc)\")\n", + " end\n", + " end\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ { "data": { "text/plain": [ - "0.9766" + "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m\n", + "\u001b[0m\u001b[1m \u001b[22m Time Allocations \n", + " ─────────────────────── ────────────────────────\n", + " Tot / % measured: 33.3s / 98.1% 29.0GiB / 99.7% \n", + "\n", + " Section ncalls time %tot avg alloc %tot avg\n", + " ────────────────────────────────────────────────────────────────────────────────\n", + " total_training_time 1 32.6s 100.0% 32.6s 28.9GiB 100.0% 28.9GiB\n", + " train_jit 1 26.0s 79.6% 26.0s 10.0GiB 34.8% 10.0GiB\n", + " average_epoch_tr... 9 5.80s 17.8% 645ms 17.6GiB 60.8% 1.95GiB\n", + " eval_jit 1 559ms 1.7% 559ms 283MiB 1.0% 283MiB\n", + " average_inferenc... 9 296ms 0.9% 32.9ms 1.02GiB 3.5% 116MiB\n", + "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.644896347" ] }, "execution_count": 17, @@ -527,18 +486,105 @@ } ], "source": [ - "evaluate(mlp, test_loader)" + "# Train time\n", + "# Exclude jit time\n", + "average_epoch_train_time = TimerOutputs.time(to[\"total_training_time\"][\"average_epoch_training_time\"]) / (9 * 1e9) # Outputs in nanoseconds, conver to seconds" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.41594755133614625" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Eval batch time\n", + "# Exclude jit time\n", + "num_batches = length(test_loader)\n", + "average_eval_batch_time = TimerOutputs.time(to[\"total_training_time\"][\"average_inference_time\"]) / (9 * 1e6 * num_batches) # Outputs in nanoseconds, conver to milliseconds" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9744" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_train_time = TimerOutputs.time(to[\"total_training_time\"])\n", + "final_eval_accuracy = evaluate(mlp, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dict{String, Any} with 9 entries:\n", + " \"task\" => \"classification\"\n", + " \"framework_name\" => \"Avalon.jl\"\n", + " \"final_trianing_loss\" => 0.0483891\n", + " \"total_training_time\" => 32647958292\n", + " \"average_epoch_training_time\" => 0.644896\n", + " \"final_evaluation_accuracy\" => 0.9744\n", + " \"model_name\" => \"MLP\"\n", + " \"dataset\" => \"MNIST Digits\"\n", + " \"average_batch_inference_time\" => 0.415948" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics = Dict(\n", + " \"model_name\" => \"MLP\",\n", + " \"dataset\" => \"MNIST Digits\",\n", + " \"framework_name\" => \"Avalon.jl\",\n", + " \"task\" => \"classification\",\n", + " \"total_training_time\" => total_train_time,\n", + " \"average_epoch_training_time\" => average_epoch_train_time,\n", + " \"average_batch_inference_time\" => average_eval_batch_time,\n", + " \"final_trianing_loss\" => last_loss,\n", + " \"final_evaluation_accuracy\" => final_eval_accuracy\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ - "# Avg time per train epoch: 0.753 sec\n", - "# Total time for train (with compile): 32.6 sec\n", - "# Avg time per eval step: 0.77 ms" + "using JSON;\n", + "\n", + "open(\"m1-avalon-mlp.json\",\"w\") do f\n", + " JSON.print(f, metrics)\n", + "end" ] } ], From 2b106762a235a3bd522a6af13cf4112b400cfb66 Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Mon, 31 Oct 2022 20:42:38 -0400 Subject: [PATCH 6/8] Rename files --- .../{avalon-mlp.ipynb => avalon_notebook.ipynb} | 0 multi-layer-perceptron/{avalon-mlp.jl => avalon_test.jl} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename multi-layer-perceptron/{avalon-mlp.ipynb => avalon_notebook.ipynb} (100%) rename multi-layer-perceptron/{avalon-mlp.jl => avalon_test.jl} (100%) diff --git a/multi-layer-perceptron/avalon-mlp.ipynb b/multi-layer-perceptron/avalon_notebook.ipynb similarity index 100% rename from multi-layer-perceptron/avalon-mlp.ipynb rename to multi-layer-perceptron/avalon_notebook.ipynb diff --git a/multi-layer-perceptron/avalon-mlp.jl b/multi-layer-perceptron/avalon_test.jl similarity index 100% rename from multi-layer-perceptron/avalon-mlp.jl rename to multi-layer-perceptron/avalon_test.jl From ec1582a0731b31d069cdd3f75b8f7ee4033d8a4b Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Mon, 31 Oct 2022 20:45:49 -0400 Subject: [PATCH 7/8] Convert total training time to seconds --- multi-layer-perceptron/avalon_notebook.ipynb | 72 ++++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/multi-layer-perceptron/avalon_notebook.ipynb b/multi-layer-perceptron/avalon_notebook.ipynb index 54e6bad..caa2c1d 100644 --- a/multi-layer-perceptron/avalon_notebook.ipynb +++ b/multi-layer-perceptron/avalon_notebook.ipynb @@ -340,7 +340,7 @@ "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────\u001b[22m\n", "\u001b[0m\u001b[1m \u001b[22m Time Allocations \n", " ─────────────────────── ────────────────────────\n", - " Tot / % measured: 345ms / 0.0% 45.3MiB / 0.0% \n", + " Tot / % measured: 341ms / 0.0% 45.3MiB / 0.0% \n", "\n", " Section ncalls time %tot avg alloc %tot avg\n", " ────────────────────────────────────────────────────────────────────\n", @@ -366,45 +366,45 @@ "name": "stderr", "output_type": "stream", "text": [ - "┌ Info: epoch 1 loss = 0.4375657584243529\n", + "┌ Info: epoch 1 loss = 0.43098854388539676\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 1 eval accuracy = 0.9304\n", + "┌ Info: epoch 1 eval accuracy = 0.9329\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 2 loss = 0.21220267083301655\n", + "┌ Info: epoch 2 loss = 0.20844910683454007\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 2 eval accuracy = 0.9471\n", + "┌ Info: epoch 2 eval accuracy = 0.9482\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 3 loss = 0.1592959047496635\n", + "┌ Info: epoch 3 loss = 0.15448442086061948\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 3 eval accuracy = 0.9596\n", + "┌ Info: epoch 3 eval accuracy = 0.9593\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 4 loss = 0.12536456292602383\n", + "┌ Info: epoch 4 loss = 0.12039177602707632\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 4 eval accuracy = 0.9649\n", + "┌ Info: epoch 4 eval accuracy = 0.966\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 5 loss = 0.10332849547590327\n", + "┌ Info: epoch 5 loss = 0.0983232690863611\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 5 eval accuracy = 0.9678\n", + "┌ Info: epoch 5 eval accuracy = 0.9675\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 6 loss = 0.08698907676456932\n", + "┌ Info: epoch 6 loss = 0.08252257340927549\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 6 eval accuracy = 0.9693\n", + "┌ Info: epoch 6 eval accuracy = 0.972\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 7 loss = 0.0743854635873991\n", + "┌ Info: epoch 7 loss = 0.07077896451852372\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 7 eval accuracy = 0.9723\n", + "┌ Info: epoch 7 eval accuracy = 0.9729\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 8 loss = 0.06411696637116769\n", + "┌ Info: epoch 8 loss = 0.06109750930723943\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 8 eval accuracy = 0.9747\n", + "┌ Info: epoch 8 eval accuracy = 0.9755\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 9 loss = 0.055365824426301447\n", + "┌ Info: epoch 9 loss = 0.05307631371444329\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 9 eval accuracy = 0.9742\n", + "┌ Info: epoch 9 eval accuracy = 0.976\n", "└ @ Main In[15]:22\n", - "┌ Info: epoch 10 loss = 0.04838905479051723\n", + "┌ Info: epoch 10 loss = 0.04616427247831039\n", "└ @ Main In[15]:17\n", - "┌ Info: epoch 10 eval accuracy = 0.9744\n", + "┌ Info: epoch 10 eval accuracy = 0.9757\n", "└ @ Main In[15]:22\n" ] } @@ -448,15 +448,15 @@ "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m\n", "\u001b[0m\u001b[1m \u001b[22m Time Allocations \n", " ─────────────────────── ────────────────────────\n", - " Tot / % measured: 33.3s / 98.1% 29.0GiB / 99.7% \n", + " Tot / % measured: 33.6s / 98.1% 29.0GiB / 99.7% \n", "\n", " Section ncalls time %tot avg alloc %tot avg\n", " ────────────────────────────────────────────────────────────────────────────────\n", - " total_training_time 1 32.6s 100.0% 32.6s 28.9GiB 100.0% 28.9GiB\n", - " train_jit 1 26.0s 79.6% 26.0s 10.0GiB 34.8% 10.0GiB\n", - " average_epoch_tr... 9 5.80s 17.8% 645ms 17.6GiB 60.8% 1.95GiB\n", + " total_training_time 1 32.9s 100.0% 32.9s 28.9GiB 100.0% 28.9GiB\n", + " train_jit 1 26.2s 79.7% 26.2s 10.0GiB 34.8% 10.0GiB\n", + " average_epoch_tr... 9 5.82s 17.7% 646ms 17.6GiB 60.8% 1.95GiB\n", " eval_jit 1 559ms 1.7% 559ms 283MiB 1.0% 283MiB\n", - " average_inferenc... 9 296ms 0.9% 32.9ms 1.02GiB 3.5% 116MiB\n", + " average_inferenc... 9 305ms 0.9% 33.9ms 1.02GiB 3.5% 116MiB\n", "\u001b[0m\u001b[1m ────────────────────────────────────────────────────────────────────────────────\u001b[22m" ] }, @@ -477,7 +477,7 @@ { "data": { "text/plain": [ - "0.644896347" + "0.6461179073333333" ] }, "execution_count": 17, @@ -499,7 +499,7 @@ { "data": { "text/plain": [ - "0.41594755133614625" + "0.42875644444444444" ] }, "execution_count": 18, @@ -522,7 +522,7 @@ { "data": { "text/plain": [ - "0.9744" + "0.9757" ] }, "execution_count": 19, @@ -531,7 +531,7 @@ } ], "source": [ - "total_train_time = TimerOutputs.time(to[\"total_training_time\"])\n", + "total_train_time = TimerOutputs.time(to[\"total_training_time\"]) / 1e9 # Convert nanos to seconds\n", "final_eval_accuracy = evaluate(mlp, test_loader)" ] }, @@ -546,13 +546,13 @@ "Dict{String, Any} with 9 entries:\n", " \"task\" => \"classification\"\n", " \"framework_name\" => \"Avalon.jl\"\n", - " \"final_trianing_loss\" => 0.0483891\n", - " \"total_training_time\" => 32647958292\n", - " \"average_epoch_training_time\" => 0.644896\n", - " \"final_evaluation_accuracy\" => 0.9744\n", + " \"final_trianing_loss\" => 0.0461643\n", + " \"total_training_time\" => 32.9112\n", + " \"average_epoch_training_time\" => 0.646118\n", + " \"final_evaluation_accuracy\" => 0.9757\n", " \"model_name\" => \"MLP\"\n", " \"dataset\" => \"MNIST Digits\"\n", - " \"average_batch_inference_time\" => 0.415948" + " \"average_batch_inference_time\" => 0.428756" ] }, "execution_count": 20, @@ -576,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ From 2bd6f7212f48f6a273d997e5475cd3258c82635e Mon Sep 17 00:00:00 2001 From: Trevor Yu Date: Mon, 31 Oct 2022 21:05:50 -0400 Subject: [PATCH 8/8] Finish script --- multi-layer-perceptron/avalon_test.jl | 203 +++++++++++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/multi-layer-perceptron/avalon_test.jl b/multi-layer-perceptron/avalon_test.jl index 4833830..af2e756 100644 --- a/multi-layer-perceptron/avalon_test.jl +++ b/multi-layer-perceptron/avalon_test.jl @@ -1 +1,202 @@ -# Avalon.jl implementation of multi-layer perceptron \ No newline at end of file +# Avalon.jl implementation of multi-layer perceptron + +# Imports +using Yota; +using MLDatasets; +using NNlib; +using Statistics; +using Distributions; +using Functors; +using Optimisers; +using MLUtils: DataLoader; +using OneHotArrays: onehotbatch +using Metrics; +using TimerOutputs; +using JSON; + + +# Primitives +# Linear layer +mutable struct Linear + W::AbstractMatrix{T} where T + b::AbstractVector{T} where T +end + +@functor Linear + +# Init +function Linear(in_features::Int, out_features::Int) + k_sqrt = sqrt(1 / in_features) + d = Uniform(-k_sqrt, k_sqrt) + return Linear(rand(d, out_features, in_features), rand(d, out_features)) +end +Linear(in_out::Pair{Int, Int}) = Linear(in_out[1], in_out[2]) + +function Base.show(io::IO, l::Linear) + o, i = size(l.W) + print(io, "Linear($i=>$o)") +end + +# Forward +(l::Linear)(x::Union{AbstractVector{T}, AbstractMatrix{T}}) where T = l.W * x .+ l.b + +# Cross entropy loss +function logitcrossentropy(ŷ, y; dims=1, agg=mean) + # Compute cross entropy loss from logits + # Cross entropy computed from NLL loss on logsoftmax of model outputs + agg(.-sum(y .* logsoftmax(ŷ; dims=dims); dims=dims)); +end + + +# Model definition +mutable struct Net + fc1::Linear + fc2::Linear +end + +# Need to mark functor for Optimizer to work +@functor Net + +# Init +Net() = Net( + Linear(28*28, 100), + Linear(100, 10) +) + +# Forward +function (model::Net)(x::AbstractArray) + x = reshape(x, 28*28, :) + x = model.fc1(x) + x = relu(x) + x = model.fc2(x) + return x +end + +# Create objective function to optimize +function loss_function(model::Net, x::AbstractArray, y::AbstractArray) + ŷ = model(x) + loss = logitcrossentropy(ŷ, y) + return loss +end + + +# Evaluation function +function evaluate(mlp::Net, test_loader::DataLoader)::Number + preds = [] + targets = [] + for (x, y) in test_loader + # Get model predictions + # Note argmax of nd-array gives CartesianIndex + # Need to grab the first element of each CartesianIndex to get the true index + logits = mlp(x) + ŷ = map(i -> i[1], argmax(logits, dims=1)) + append!(preds, ŷ) + + # Get true labels + true_label = map(i -> i[1], argmax(y, dims=1)) + append!(targets, true_label) + end + accuracy = sum(preds .== targets) / length(targets) + return accuracy +end + + +# Data loading and processing +function get_data_loaders(; batch_size=128) + # Data loading + train_dataset = MNIST(split=:train); + test_dataset = MNIST(split=:test); + + X_train = train_dataset.features; + Y_train = train_dataset.targets; + + X_test = test_dataset.features; + Y_test = test_dataset.targets; + + # Flatten features to be 784 dim + X_train = reshape(X_train, 784, :); # (dim x batch) + X_test = reshape(X_test, 784, :); + + # Convert targets to one-hot vectors + Y_train = onehotbatch(Y_train, 0:9); + Y_test = onehotbatch(Y_test, 0:9); # (dim x batch) + + train_loader = DataLoader((X_train, Y_train), shuffle=true, batchsize=batch_size); + test_loader = DataLoader((X_test, Y_test), shuffle=false, batchsize=batch_size); + return train_loader, test_loader +end + +# Setup timing +const to = TimerOutput() + + +function main() + train_loader, test_loader = get_data_loaders(batch_size=128) + + # Setup model and optimizer + mlp = Net() + + # Default Β is (0.9, 0.999) + state = Optimisers.setup(Optimisers.Adam(1e-3), mlp); + + # Training loop + last_loss = 0; + @timeit to "total_training_time" begin + for epoch in 1:10 + # Store the timing in the first epoch into a separate timer for jit + timing_name = epoch > 1 ? "train_epoch" : "train_jit" + @timeit to timing_name begin + losses = [] + for (x, y) in train_loader + # loss_function does forward pass + # Yota.jl grad function computes model parameter gradients in g[2] + loss, g = grad(loss_function, mlp, x, y) + + # Optimiser updates parameters + Optimisers.update!(state, mlp, g[2]) + push!(losses, loss) + end + last_loss = mean(losses) + @info("epoch $epoch loss = $(mean(losses))") + end + timing_name = epoch > 1 ? "eval_epoch" : "eval_jit" + @timeit to timing_name begin + acc = evaluate(mlp, test_loader) + @info("epoch $epoch eval accuracy = $(acc)") + end + end + end + + # Compute timing metrics + # Outputs in nanoseconds, convert to seconds + average_epoch_train_time = TimerOutputs.time(to["total_training_time"]["train_epoch"]) / (9 * 1e9) + total_train_time = TimerOutputs.time(to["total_training_time"]) / 1e9 + + + num_batches = length(test_loader) + # Outputs in nanoseconds, conver to milliseconds + average_eval_batch_time = TimerOutputs.time(to["total_training_time"]["eval_epoch"]) / (9 * 1e6 * num_batches) + + final_eval_accuracy = evaluate(mlp, test_loader) + + metrics = Dict( + "model_name" => "MLP", + "dataset" => "MNIST Digits", + "framework_name" => "Avalon.jl", + "task" => "classification", + "total_training_time" => total_train_time, + "average_epoch_training_time" => average_epoch_train_time, + "average_batch_inference_time" => average_eval_batch_time, + "final_trianing_loss" => last_loss, + "final_evaluation_accuracy" => final_eval_accuracy + ) + open("m1-avalon-mlp.json","w") do f + JSON.print(f, metrics) + end +end + + +# Run main function +if abspath(PROGRAM_FILE) == @__FILE__ + main() +end \ No newline at end of file