None instances pesent in the code."
+ ],
+ "metadata": {
+ "id": "91bMnvq8oqbp"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!cp \"/content/Machine-Learning-Simplified/Day-1/LRTestCases.py\" \"/content/\""
+ ],
+ "metadata": {
+ "id": "6DNQ2HHJHjYZ"
+ },
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from LRTestCases import *"
+ ],
+ "metadata": {
+ "id": "dG5rIH8KHUOT"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "1tOdDoSwZeIE"
+ },
+ "outputs": [],
+ "source": [
+ "def error(yhat, y):\n",
+ " error = yhat - y #Subtract y from yhat\n",
+ " return error"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_error_function(error)"
+ ],
+ "metadata": {
+ "id": "D7yM32C_Xz-g",
+ "outputId": "bf6a85c8-c851-4730-849e-19607fe108ff",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def error_square(error):\n",
+ " square = error ** 2 #Square the error claculated above\n",
+ " return square"
+ ],
+ "metadata": {
+ "id": "Nnoygph2er5y"
+ },
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_error_square_function(error_square)"
+ ],
+ "metadata": {
+ "id": "MHGCiEXJX_AR",
+ "outputId": "32226b06-966d-4b90-97ed-853e254d1d52",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def total_squared_error(error, num):\n",
+ " total_squared_error = 0\n",
+ " for i in range(num):\n",
+ " total_squared_error = total_squared_error + error #Add the \"error\" to the \"total_sqared_error\"\n",
+ " return total_squared_error"
+ ],
+ "metadata": {
+ "id": "wgwOEQK9fMqA"
+ },
+ "execution_count": 9,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_total_squared_error_function(total_squared_error)"
+ ],
+ "metadata": {
+ "id": "ilzjasT4YJAh",
+ "outputId": "a12ecff0-686c-4e1b-a0a2-0a8f959c64b9",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "$\\text{Mean Squared Error}=\\frac{1}{2*m}\\sum\\limits_{i = 0}^{m-1}(y-ŷ)^2$"
+ ],
+ "metadata": {
+ "id": "oS7bM8mBjJ-u"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def mse(total_squared_error, num):\n",
+ " denominator = num/2 #Multipy num with 2\n",
+ " mse = total_squared_error / denominator #Divide \"total_sqaured_error\" by \"denominator\"\n",
+ " return num"
+ ],
+ "metadata": {
+ "id": "Q6sFN7i2piR8"
+ },
+ "execution_count": 11,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_mse_function(mse)"
+ ],
+ "metadata": {
+ "id": "fZ816KtRYsoj",
+ "outputId": "c9c92b3f-6e66-49be-e771-5c51698e8a6f",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Finding the predicted value**"
+ ],
+ "metadata": {
+ "id": "3Uqj2E0BlU_U"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def predicted_value(w, x, b):\n",
+ " yhat =( w * x )+ b #Multiply 'w' with 'x' and add 'b'\n",
+ " return yhat"
+ ],
+ "metadata": {
+ "id": "oiwBU-fSjp5C"
+ },
+ "execution_count": 13,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_predicted_value(predicted_value)"
+ ],
+ "metadata": {
+ "id": "MXs2kadnreTF",
+ "outputId": "2782293e-7746-4b85-c5db-eddeaa5191b3",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Cost Function\n",
+ "The equation for cost with one variable is:\n",
+ "$$J(w,b) = \\frac{1}{2m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)})^2$$"
+ ],
+ "metadata": {
+ "id": "qW9qAdiUwOrk"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def compute_cost(x, y, w, b):\n",
+ " # number of training examples\n",
+ " m = x.shape[0]\n",
+ " total_squared_error = 0\n",
+ " for i in range(m):\n",
+ " yhat = w * x[i] + b\n",
+ " error = y[i]-yhat #Subtract \"y[i]\" from \"yhat\"\n",
+ " squared_error = error**2 #Square the error\n",
+ " total_squared_error = error +total_squared_error #Add the \"error\" to the \"total_sqared_error\"\n",
+ " denominator = m*2 #Multiply m by 2\n",
+ " total_cost = total_squared_error/denominator #Divide total_squared_error by denominator\n",
+ " return total_cost"
+ ],
+ "metadata": {
+ "id": "RCCqRXf-wNoI"
+ },
+ "execution_count": 21,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Gradient Descent\n",
+ "## 1. Compute Gradient\n",
+ "The gradient is defined as:\n",
+ "$$\n",
+ "\\begin{align}\n",
+ "\\frac{\\partial J(w,b)}{\\partial w} &= \\frac{1}{m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)})x^{(i)} \\\\\n",
+ " \\frac{\\partial J(w,b)}{\\partial b} &= \\frac{1}{m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)}) \\\\\n",
+ "\\end{align}\n",
+ "$$"
+ ],
+ "metadata": {
+ "id": "OGswOCVFzR7W"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def compute_gradient(x, y, w, b):\n",
+ " # Number of training examples\n",
+ " m = x.shape[0]\n",
+ " dj_dw = 0\n",
+ " dj_db = 0\n",
+ "\n",
+ " for i in range(m):\n",
+ " yhat = w * x[i] + b\n",
+ " dj_dw_i = (yhat - y[i]) * x[i]\n",
+ " dj_db_i = yhat - y[i]\n",
+ " dj_db += dj_db_i\n",
+ " dj_dw += dj_dw_i\n",
+ " dj_dw = dj_dw / m\n",
+ " dj_db = dj_db / m\n",
+ "\n",
+ " return dj_dw, dj_db"
+ ],
+ "metadata": {
+ "id": "VMBk0PnA0wK3"
+ },
+ "execution_count": 19,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "test_compute_gradient(compute_gradient)"
+ ],
+ "metadata": {
+ "id": "3U-qZNSFE1QK",
+ "outputId": "62178254-d6c9-4eec-8d22-f6a8ea0fcb63",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## 2. Update the parameters num_iterations times\n",
+ "$$\\begin{align*} \\text{repeat}&\\text{ until convergence:} \\; \\lbrace \\newline\n",
+ "\\; w &= w - \\alpha \\frac{\\partial J(w,b)}{\\partial w} \\; \\newline\n",
+ " b &= b - \\alpha \\frac{\\partial J(w,b)}{\\partial b} \\newline \\rbrace\n",
+ "\\end{align*}$$\n",
+ "where, parameters $w$, $b$ are updated simultaneously."
+ ],
+ "metadata": {
+ "id": "AhWGXBq82p6P"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def gradient_descent(x, y, learning_rate, num_iterations):\n",
+ " # Initialize weights and bias\n",
+ " w = 0\n",
+ " b = 0\n",
+ " # Number of training examples\n",
+ " m = x.shape[0]\n",
+ " for _ in range(num_iterations):\n",
+ " # Compute gradients using the compute_gradient function\n",
+ " dj_dw, dj_db = compute_gradient(x, y, w, b)\n",
+ "\n",
+ " # Update weights and bias\n",
+ " w = w - learning_rate * dj_dw\n",
+ " b = b - learning_rate * dj_db\n",
+ " # Compute the cost for monitoring\n",
+ " cost = compute_cost(x, y, w, b)\n",
+ " print(f'Iteration {_+1}/{num_iterations}, Cost: {cost:.6f}')\n",
+ " return w, b"
+ ],
+ "metadata": {
+ "id": "OlNwBvu52cUv"
+ },
+ "execution_count": 22,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "test_gradient_descent(gradient_descent, compute_cost, compute_gradient)"
+ ],
+ "metadata": {
+ "id": "dpmvXbs4lmHO",
+ "outputId": "b4f4f34b-3661-4365-db7a-ed1b331eac25",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Iteration 1/10, Cost: 1.782000\n",
+ "Iteration 2/10, Cost: 1.589760\n",
+ "Iteration 3/10, Cost: 1.420232\n",
+ "Iteration 4/10, Cost: 1.270730\n",
+ "Iteration 5/10, Cost: 1.138884\n",
+ "Iteration 6/10, Cost: 1.022606\n",
+ "Iteration 7/10, Cost: 0.920053\n",
+ "Iteration 8/10, Cost: 0.829601\n",
+ "Iteration 9/10, Cost: 0.749820\n",
+ "Iteration 10/10, Cost: 0.679447\n",
+ "Final parameters: w = 0.7955, b = 0.2545\n",
+ "Final cost: 0.679447\n",
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "NknHm9DGGonf"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Project:** Melanoma Tumor Size Prediction"
+ ],
+ "metadata": {
+ "id": "_Vb4kNxkG_Ml"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt"
+ ],
+ "metadata": {
+ "id": "ibTpczTtGokE"
+ },
+ "execution_count": 30,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Read the dataset\n",
+ "data = pd.read_csv('/content/Machine-Learning-Simplified/Day-1/melanoma_dataset.csv')"
+ ],
+ "metadata": {
+ "id": "eqAMMJ9lGohp"
+ },
+ "execution_count": 31,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Display the dataset\n",
+ "sns.scatterplot(x='mass_npea', y='tumor_size', data=data)"
+ ],
+ "metadata": {
+ "id": "Q14KncK6GofO",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 466
+ },
+ "outputId": "e9d46832-c09c-4d53-a936-92287d869bcf"
+ },
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "| \n", + " | mass_npea | \n", + "tumor_size | \n", + "
|---|---|---|
| count | \n", + "1000.000000 | \n", + "1000.000000 | \n", + "
| mean | \n", + "22.709158 | \n", + "9.805131 | \n", + "
| std | \n", + "11.682122 | \n", + "5.842747 | \n", + "
| min | \n", + "1.575483 | \n", + "0.092640 | \n", + "
| 25% | \n", + "12.290811 | \n", + "4.719465 | \n", + "
| 50% | \n", + "22.968280 | \n", + "9.936148 | \n", + "
| 75% | \n", + "32.664439 | \n", + "14.886392 | \n", + "
| max | \n", + "44.255681 | \n", + "19.994353 | \n", + "
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
None instances pesent in the code."
+ ],
+ "metadata": {
+ "id": "91bMnvq8oqbp"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!cp \"/content/Machine-Learning-Simplified/Day-1/LRTestCases.py\" \"/content/\""
+ ],
+ "metadata": {
+ "id": "6DNQ2HHJHjYZ"
+ },
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from LRTestCases import *"
+ ],
+ "metadata": {
+ "id": "dG5rIH8KHUOT"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "1tOdDoSwZeIE"
+ },
+ "outputs": [],
+ "source": [
+ "def error(yhat, y):\n",
+ " error = yhat - y #Subtract y from yhat\n",
+ " return error"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_error_function(error)"
+ ],
+ "metadata": {
+ "id": "D7yM32C_Xz-g",
+ "outputId": "bf6a85c8-c851-4730-849e-19607fe108ff",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def error_square(error):\n",
+ " square = error ** 2 #Square the error claculated above\n",
+ " return square"
+ ],
+ "metadata": {
+ "id": "Nnoygph2er5y"
+ },
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_error_square_function(error_square)"
+ ],
+ "metadata": {
+ "id": "MHGCiEXJX_AR",
+ "outputId": "32226b06-966d-4b90-97ed-853e254d1d52",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def total_squared_error(error, num):\n",
+ " total_squared_error = 0\n",
+ " for i in range(num):\n",
+ " total_squared_error = total_squared_error + error #Add the \"error\" to the \"total_sqared_error\"\n",
+ " return total_squared_error"
+ ],
+ "metadata": {
+ "id": "wgwOEQK9fMqA"
+ },
+ "execution_count": 9,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_total_squared_error_function(total_squared_error)"
+ ],
+ "metadata": {
+ "id": "ilzjasT4YJAh",
+ "outputId": "a12ecff0-686c-4e1b-a0a2-0a8f959c64b9",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "$\\text{Mean Squared Error}=\\frac{1}{2*m}\\sum\\limits_{i = 0}^{m-1}(y-ŷ)^2$"
+ ],
+ "metadata": {
+ "id": "oS7bM8mBjJ-u"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def mse(total_squared_error, num):\n",
+ " denominator = num/2 #Multipy num with 2\n",
+ " mse = total_squared_error / denominator #Divide \"total_sqaured_error\" by \"denominator\"\n",
+ " return num"
+ ],
+ "metadata": {
+ "id": "Q6sFN7i2piR8"
+ },
+ "execution_count": 11,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_mse_function(mse)"
+ ],
+ "metadata": {
+ "id": "fZ816KtRYsoj",
+ "outputId": "c9c92b3f-6e66-49be-e771-5c51698e8a6f",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Finding the predicted value**"
+ ],
+ "metadata": {
+ "id": "3Uqj2E0BlU_U"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def predicted_value(w, x, b):\n",
+ " yhat =( w * x )+ b #Multiply 'w' with 'x' and add 'b'\n",
+ " return yhat"
+ ],
+ "metadata": {
+ "id": "oiwBU-fSjp5C"
+ },
+ "execution_count": 13,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_predicted_value(predicted_value)"
+ ],
+ "metadata": {
+ "id": "MXs2kadnreTF",
+ "outputId": "2782293e-7746-4b85-c5db-eddeaa5191b3",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Cost Function\n",
+ "The equation for cost with one variable is:\n",
+ "$$J(w,b) = \\frac{1}{2m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)})^2$$"
+ ],
+ "metadata": {
+ "id": "qW9qAdiUwOrk"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def compute_cost(x, y, w, b):\n",
+ " # number of training examples\n",
+ " m = x.shape[0]\n",
+ " total_squared_error = 0\n",
+ " for i in range(m):\n",
+ " yhat = w * x[i] + b\n",
+ " error = y[i]-yhat #Subtract \"y[i]\" from \"yhat\"\n",
+ " squared_error = error**2 #Square the error\n",
+ " total_squared_error = error +total_squared_error #Add the \"error\" to the \"total_sqared_error\"\n",
+ " denominator = m*2 #Multiply m by 2\n",
+ " total_cost = total_squared_error/denominator #Divide total_squared_error by denominator\n",
+ " return total_cost"
+ ],
+ "metadata": {
+ "id": "RCCqRXf-wNoI"
+ },
+ "execution_count": 21,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Gradient Descent\n",
+ "## 1. Compute Gradient\n",
+ "The gradient is defined as:\n",
+ "$$\n",
+ "\\begin{align}\n",
+ "\\frac{\\partial J(w,b)}{\\partial w} &= \\frac{1}{m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)})x^{(i)} \\\\\n",
+ " \\frac{\\partial J(w,b)}{\\partial b} &= \\frac{1}{m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)}) \\\\\n",
+ "\\end{align}\n",
+ "$$"
+ ],
+ "metadata": {
+ "id": "OGswOCVFzR7W"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def compute_gradient(x, y, w, b):\n",
+ " # Number of training examples\n",
+ " m = x.shape[0]\n",
+ " dj_dw = 0\n",
+ " dj_db = 0\n",
+ "\n",
+ " for i in range(m):\n",
+ " yhat = w * x[i] + b\n",
+ " dj_dw_i = (yhat - y[i]) * x[i]\n",
+ " dj_db_i = yhat - y[i]\n",
+ " dj_db += dj_db_i\n",
+ " dj_dw += dj_dw_i\n",
+ " dj_dw = dj_dw / m\n",
+ " dj_db = dj_db / m\n",
+ "\n",
+ " return dj_dw, dj_db"
+ ],
+ "metadata": {
+ "id": "VMBk0PnA0wK3"
+ },
+ "execution_count": 19,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "test_compute_gradient(compute_gradient)"
+ ],
+ "metadata": {
+ "id": "3U-qZNSFE1QK",
+ "outputId": "62178254-d6c9-4eec-8d22-f6a8ea0fcb63",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## 2. Update the parameters num_iterations times\n",
+ "$$\\begin{align*} \\text{repeat}&\\text{ until convergence:} \\; \\lbrace \\newline\n",
+ "\\; w &= w - \\alpha \\frac{\\partial J(w,b)}{\\partial w} \\; \\newline\n",
+ " b &= b - \\alpha \\frac{\\partial J(w,b)}{\\partial b} \\newline \\rbrace\n",
+ "\\end{align*}$$\n",
+ "where, parameters $w$, $b$ are updated simultaneously."
+ ],
+ "metadata": {
+ "id": "AhWGXBq82p6P"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def gradient_descent(x, y, learning_rate, num_iterations):\n",
+ " # Initialize weights and bias\n",
+ " w = 0\n",
+ " b = 0\n",
+ " # Number of training examples\n",
+ " m = x.shape[0]\n",
+ " for _ in range(num_iterations):\n",
+ " # Compute gradients using the compute_gradient function\n",
+ " dj_dw, dj_db = compute_gradient(x, y, w, b)\n",
+ "\n",
+ " # Update weights and bias\n",
+ " w = w - learning_rate * dj_dw\n",
+ " b = b - learning_rate * dj_db\n",
+ " # Compute the cost for monitoring\n",
+ " cost = compute_cost(x, y, w, b)\n",
+ " print(f'Iteration {_+1}/{num_iterations}, Cost: {cost:.6f}')\n",
+ " return w, b"
+ ],
+ "metadata": {
+ "id": "OlNwBvu52cUv"
+ },
+ "execution_count": 22,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "test_gradient_descent(gradient_descent, compute_cost, compute_gradient)"
+ ],
+ "metadata": {
+ "id": "dpmvXbs4lmHO",
+ "outputId": "b4f4f34b-3661-4365-db7a-ed1b331eac25",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Iteration 1/10, Cost: 1.782000\n",
+ "Iteration 2/10, Cost: 1.589760\n",
+ "Iteration 3/10, Cost: 1.420232\n",
+ "Iteration 4/10, Cost: 1.270730\n",
+ "Iteration 5/10, Cost: 1.138884\n",
+ "Iteration 6/10, Cost: 1.022606\n",
+ "Iteration 7/10, Cost: 0.920053\n",
+ "Iteration 8/10, Cost: 0.829601\n",
+ "Iteration 9/10, Cost: 0.749820\n",
+ "Iteration 10/10, Cost: 0.679447\n",
+ "Final parameters: w = 0.7955, b = 0.2545\n",
+ "Final cost: 0.679447\n",
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "NknHm9DGGonf"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Project:** Melanoma Tumor Size Prediction"
+ ],
+ "metadata": {
+ "id": "_Vb4kNxkG_Ml"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt"
+ ],
+ "metadata": {
+ "id": "ibTpczTtGokE"
+ },
+ "execution_count": 30,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Read the dataset\n",
+ "data = pd.read_csv('/content/Machine-Learning-Simplified/Day-1/melanoma_dataset.csv')"
+ ],
+ "metadata": {
+ "id": "eqAMMJ9lGohp"
+ },
+ "execution_count": 31,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Display the dataset\n",
+ "sns.scatterplot(x='mass_npea', y='tumor_size', data=data)"
+ ],
+ "metadata": {
+ "id": "Q14KncK6GofO",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 466
+ },
+ "outputId": "e9d46832-c09c-4d53-a936-92287d869bcf"
+ },
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "| \n", + " | mass_npea | \n", + "tumor_size | \n", + "
|---|---|---|
| count | \n", + "1000.000000 | \n", + "1000.000000 | \n", + "
| mean | \n", + "22.709158 | \n", + "9.805131 | \n", + "
| std | \n", + "11.682122 | \n", + "5.842747 | \n", + "
| min | \n", + "1.575483 | \n", + "0.092640 | \n", + "
| 25% | \n", + "12.290811 | \n", + "4.719465 | \n", + "
| 50% | \n", + "22.968280 | \n", + "9.936148 | \n", + "
| 75% | \n", + "32.664439 | \n", + "14.886392 | \n", + "
| max | \n", + "44.255681 | \n", + "19.994353 | \n", + "
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
| \n", + " | mass_npea | \n", + "size_npear | \n", + "malign_ratio | \n", + "damage_size | \n", + "exposed_area | \n", + "std_dev_malign | \n", + "err_malign | \n", + "malign_penalty | \n", + "damage_ratio | \n", + "tumor_size | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "6930.90 | \n", + "2919.02 | \n", + "0.42116 | \n", + "51.8298 | \n", + "9.888294e+05 | \n", + "109.4870 | \n", + "2758.76 | \n", + "72 | \n", + "39.3620 | \n", + "14.103 | \n", + "
| 1 | \n", + "15635.70 | \n", + "4879.36 | \n", + "0.31206 | \n", + "223.5500 | \n", + "2.058426e+06 | \n", + "248.8810 | \n", + "5952.53 | \n", + "240 | \n", + "22.0253 | \n", + "2.648 | \n", + "
| 2 | \n", + "10376.20 | \n", + "2613.88 | \n", + "0.25191 | \n", + "127.3370 | \n", + "1.434676e+06 | \n", + "160.0930 | \n", + "4635.26 | \n", + "73 | \n", + "29.9963 | \n", + "1.688 | \n", + "
| 3 | \n", + "13093.80 | \n", + "4510.06 | \n", + "0.34444 | \n", + "155.4400 | \n", + "1.812195e+06 | \n", + "173.0150 | \n", + "5273.87 | \n", + "32 | \n", + "28.1354 | \n", + "3.796 | \n", + "
| 4 | \n", + "7545.21 | \n", + "2882.36 | \n", + "0.38201 | \n", + "85.1237 | \n", + "1.043918e+06 | \n", + "124.4140 | \n", + "3263.35 | \n", + "57 | \n", + "35.0200 | \n", + "18.023 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 9141 | \n", + "7250.25 | \n", + "3120.63 | \n", + "0.43041 | \n", + "82.0410 | \n", + "9.794768e+05 | \n", + "118.7710 | \n", + "3370.24 | \n", + "53 | \n", + "37.0260 | \n", + "13.127 | \n", + "
| 9142 | \n", + "10145.00 | \n", + "3544.90 | \n", + "0.34942 | \n", + "90.1403 | \n", + "1.374393e+06 | \n", + "154.0270 | \n", + "5025.50 | \n", + "30 | \n", + "31.0565 | \n", + "17.091 | \n", + "
| 9143 | \n", + "8086.10 | \n", + "1621.65 | \n", + "0.20054 | \n", + "78.5118 | \n", + "1.134257e+06 | \n", + "104.2840 | \n", + "3804.98 | \n", + "13 | \n", + "34.2739 | \n", + "1.971 | \n", + "
| 9144 | \n", + "14418.90 | \n", + "6373.71 | \n", + "0.44203 | \n", + "84.0665 | \n", + "1.955398e+06 | \n", + "246.4450 | \n", + "19881.74 | \n", + "39 | \n", + "34.5885 | \n", + "17.749 | \n", + "
| 9145 | \n", + "6852.61 | \n", + "1584.64 | \n", + "0.23124 | \n", + "51.3211 | \n", + "9.559976e+05 | \n", + "80.6543 | \n", + "3073.51 | \n", + "28 | \n", + "37.8939 | \n", + "14.103 | \n", + "
9146 rows × 10 columns
\n", + "RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor()