| \n", + " | longitude,\"latitude\",\"housing_median_age\",\"total_ | \n", + "ooms\",\"total_bed | \n", + "ooms\",\"population\",\"households\",\"median_income\",\"median_house_value\" | \n", + "
|---|---|---|---|
| 0 | \n", + "-122.050000,37.370000,27.000000,3885.000000,66... | \n", + "NaN | \n", + "NaN | \n", + "
| 1 | \n", + "-118.300000,34.260000,43.000000,1510.000000,31... | \n", + "NaN | \n", + "NaN | \n", + "
| 2 | \n", + "-117.810000,33.780000,27.000000,3589.000000,50... | \n", + "NaN | \n", + "NaN | \n", + "
| 3 | \n", + "-118.360000,33.820000,28.000000,67.000000,15.0... | \n", + "NaN | \n", + "NaN | \n", + "
| 4 | \n", + "-119.670000,36.330000,19.000000,1241.000000,24... | \n", + "NaN | \n", + "NaN | \n", + "
| 5 | \n", + "-119.560000,36.510000,37.000000,1018.000000,21... | \n", + "NaN | \n", + "NaN | \n", + "
| 6 | \n", + "-121.430000,38.630000,43.000000,1009.000000,22... | \n", + "NaN | \n", + "NaN | \n", + "
| 7 | \n", + "-120.650000,35.480000,19.000000,2310.000000,47... | \n", + "NaN | \n", + "NaN | \n", + "
| 8 | \n", + "-122.840000,38.400000,15.000000,3080.000000,61... | \n", + "NaN | \n", + "NaN | \n", + "
| 9 | \n", + "-118.020000,34.080000,31.000000,2402.000000,63... | \n", + "NaN | \n", + "NaN | \n", + "
| \n", + " | Detail1 | \n", + "ooms\",\"total_bed | \n", + "ooms\",\"population\",\"households\",\"median_income\",\"median_house_value\" | \n", + "
|---|---|---|---|
| 0 | \n", + "-122.050000,37.370000,27.000000,3885.000000,66... | \n", + "NaN | \n", + "NaN | \n", + "
| 1 | \n", + "-118.300000,34.260000,43.000000,1510.000000,31... | \n", + "NaN | \n", + "NaN | \n", + "
| 2 | \n", + "-117.810000,33.780000,27.000000,3589.000000,50... | \n", + "NaN | \n", + "NaN | \n", + "
| \n", + " | w | \n", + "x | \n", + "y | \n", + "z | \n", + "
|---|---|---|---|---|
| a | \n", + "0 | \n", + "1 | \n", + "2 | \n", + "3 | \n", + "
| b | \n", + "4 | \n", + "5 | \n", + "6 | \n", + "7 | \n", + "
| c | \n", + "8 | \n", + "9 | \n", + "10 | \n", + "11 | \n", + "
| d | \n", + "12 | \n", + "13 | \n", + "14 | \n", + "15 | \n", + "
| \n", + " | w | \n", + "x | \n", + "
|---|---|---|
| a | \n", + "0 | \n", + "1 | \n", + "
| b | \n", + "4 | \n", + "5 | \n", + "
| \n", + " | w | \n", + "y | \n", + "
|---|---|---|
| a | \n", + "0 | \n", + "2 | \n", + "
| c | \n", + "8 | \n", + "10 | \n", + "
None instances pesent in the code."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "6DNQ2HHJHjYZ"
+ },
+ "outputs": [],
+ "source": [
+ "!cp \"/content/Machine-Learning-Simplified/Day-1/LRTestCases.py\" \"/content/\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "dG5rIH8KHUOT"
+ },
+ "outputs": [],
+ "source": [
+ "from LRTestCases import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "1tOdDoSwZeIE"
+ },
+ "outputs": [],
+ "source": [
+ "def error(yhat, y):\n",
+ " error = yhat-y #Subtract y from yhat\n",
+ " return error"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "D7yM32C_Xz-g",
+ "outputId": "619b2be0-4236-40cc-a2fa-fb05008b54dc"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_error_function(error)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Nnoygph2er5y"
+ },
+ "outputs": [],
+ "source": [
+ "def error_square(error):\n",
+ " square = error_calculated ** 2 #Square the error claculated above\n",
+ " return square"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MHGCiEXJX_AR",
+ "outputId": "dedd1d03-d4c8-46a1-e145-cbf40b0d2d6b"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_error_function(error)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "wgwOEQK9fMqA"
+ },
+ "outputs": [],
+ "source": [
+ "def total_squared_error(error, num):\n",
+ " total_squared_error = 0\n",
+ " for i in range(num):\n",
+ " total_squared_error = error + total_squared_error #Add the \"error\" to the \"total_sqared_error\"\n",
+ " return total_squared_error"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ilzjasT4YJAh",
+ "outputId": "874c4c1b-32b5-4e6e-f258-92eb1163d2b4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_total_squared_error_function(total_squared_error)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oS7bM8mBjJ-u"
+ },
+ "source": [
+ "$\\text{Mean Squared Error}=\\frac{1}{2*m}\\sum\\limits_{i = 0}^{m-1}(y-ŷ)^2$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Q6sFN7i2piR8"
+ },
+ "outputs": [],
+ "source": [
+ "def mse(total_squared_error, num):\n",
+ " denominator = num*2 #Multipy num with 2\n",
+ " mse = total_squared_error/denominator #Divide \"total_sqaured_error\" by \"denominator\"\n",
+ " return num"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "fZ816KtRYsoj",
+ "outputId": "93bc6f50-36e4-44ce-d8b1-be1dd8bf0e4a"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_mse_function(mse)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3Uqj2E0BlU_U"
+ },
+ "source": [
+ "**Finding the predicted value**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "oiwBU-fSjp5C"
+ },
+ "outputs": [],
+ "source": [
+ "def predicted_value(w, x, b):\n",
+ " yhat = w*x+b #Multiply 'w' with 'x' and add 'b'\n",
+ " return yhat"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MXs2kadnreTF",
+ "outputId": "3857049c-da81-4ba8-9d3b-18c3edcd5441"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "#TEST YOUR IMPLEMENTATION\n",
+ "test_predicted_value(predicted_value)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qW9qAdiUwOrk"
+ },
+ "source": [
+ "## Cost Function\n",
+ "The equation for cost with one variable is:\n",
+ "$$J(w,b) = \\frac{1}{2m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)})^2$$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "RCCqRXf-wNoI"
+ },
+ "outputs": [],
+ "source": [
+ "def compute_cost(x, y, w, b):\n",
+ " # number of training examples\n",
+ " m = x.shape[0]\n",
+ " total_squared_error = 0\n",
+ " for i in range(m):\n",
+ " yhat = w * x[i] + b\n",
+ " error = yhat-y[i] #Subtract \"y[i]\" from \"yhat\"\n",
+ " squared_error = error ** 2 #Square the error\n",
+ " total_squared_error = error + total_squared_error #Add the \"error\" to the \"total_sqared_error\"\n",
+ " denominator = m*2 #Multiply m by 2\n",
+ " total_cost = total_squared_error / denominator #Divide total_cost by denominator\n",
+ " return total_cost"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OGswOCVFzR7W"
+ },
+ "source": [
+ "# Gradient Descent\n",
+ "## 1. Compute Gradient\n",
+ "The gradient is defined as:\n",
+ "$$\n",
+ "\\begin{align}\n",
+ "\\frac{\\partial J(w,b)}{\\partial w} &= \\frac{1}{m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)})x^{(i)} \\\\\n",
+ " \\frac{\\partial J(w,b)}{\\partial b} &= \\frac{1}{m} \\sum\\limits_{i = 0}^{m-1} (ŷ - y^{(i)}) \\\\\n",
+ "\\end{align}\n",
+ "$$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "VMBk0PnA0wK3"
+ },
+ "outputs": [],
+ "source": [
+ "def compute_gradient(x, y, w, b):\n",
+ " # Number of training examples\n",
+ " m = x.shape[0]\n",
+ " dj_dw = 0\n",
+ " dj_db = 0\n",
+ "\n",
+ " for i in range(m):\n",
+ " yhat = w * x[i] + b\n",
+ " dj_dw_i = (yhat - y[i]) * x[i]\n",
+ " dj_db_i = yhat - y[i]\n",
+ " dj_db += dj_db_i\n",
+ " dj_dw += dj_dw_i\n",
+ " dj_dw = dj_dw / m\n",
+ " dj_db = dj_db / m\n",
+ "\n",
+ " return dj_dw, dj_db"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3U-qZNSFE1QK",
+ "outputId": "deb5e7b2-ee3d-4249-9f2e-3ee3470d8a4d"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "test_compute_gradient(compute_gradient)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AhWGXBq82p6P"
+ },
+ "source": [
+ "## 2. Update the parameters num_iterations times\n",
+ "$$\\begin{align*} \\text{repeat}&\\text{ until convergence:} \\; \\lbrace \\newline\n",
+ "\\; w &= w - \\alpha \\frac{\\partial J(w,b)}{\\partial w} \\; \\newline\n",
+ " b &= b - \\alpha \\frac{\\partial J(w,b)}{\\partial b} \\newline \\rbrace\n",
+ "\\end{align*}$$\n",
+ "where, parameters $w$, $b$ are updated simultaneously."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "OlNwBvu52cUv"
+ },
+ "outputs": [],
+ "source": [
+ "def gradient_descent(x, y, learning_rate, num_iterations):\n",
+ " # Initialize weights and bias\n",
+ " w = 0\n",
+ " b = 0\n",
+ " # Number of training examples\n",
+ " m = x.shape[0]\n",
+ " for _ in range(num_iterations):\n",
+ " # Compute gradients using the compute_gradient function\n",
+ " dj_dw, dj_db = compute_gradient(x, y, w, b)\n",
+ "\n",
+ " # Update weights and bias\n",
+ " w = w - learning_rate * dj_dw\n",
+ " b = b - learning_rate * dj_db\n",
+ " # Compute the cost for monitoring\n",
+ " cost = compute_cost(x, y, w, b)\n",
+ " print(f'Iteration {_+1}/{num_iterations}, Cost: {cost:.6f}')\n",
+ " return w, b"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "dpmvXbs4lmHO",
+ "outputId": "59c057d6-d2fa-4301-da22-928ac1a0ec8e"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Iteration 1/10, Cost: -1.782000\n",
+ "Iteration 2/10, Cost: -1.589760\n",
+ "Iteration 3/10, Cost: -1.420232\n",
+ "Iteration 4/10, Cost: -1.270730\n",
+ "Iteration 5/10, Cost: -1.138884\n",
+ "Iteration 6/10, Cost: -1.022606\n",
+ "Iteration 7/10, Cost: -0.920053\n",
+ "Iteration 8/10, Cost: -0.829601\n",
+ "Iteration 9/10, Cost: -0.749820\n",
+ "Iteration 10/10, Cost: -0.679447\n",
+ "Final parameters: w = 0.7955, b = 0.2545\n",
+ "Final cost: -0.679447\n",
+ "\u001b[92mTest passed!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "test_gradient_descent(gradient_descent, compute_cost, compute_gradient)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "NknHm9DGGonf"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_Vb4kNxkG_Ml"
+ },
+ "source": [
+ "# **Project:** Melanoma Tumor Size Prediction"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ibTpczTtGokE"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "eqAMMJ9lGohp"
+ },
+ "outputs": [],
+ "source": [
+ "#Read the dataset\n",
+ "data = pd.read_csv('/content/Machine-Learning-Simplified/Day-1/melanoma_dataset.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "Q14KncK6GofO",
+ "outputId": "b3fda9fb-80c5-478b-d8df-e4eb26f048e2"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " | \n", + " | mass_npea | \n", + "tumor_size | \n", + "
|---|---|---|
| 0 | \n", + "18.159306 | \n", + "7.490802 | \n", + "
| 1 | \n", + "39.693228 | \n", + "19.014286 | \n", + "
| 2 | \n", + "32.659956 | \n", + "14.639879 | \n", + "
| 3 | \n", + "27.556925 | \n", + "11.973170 | \n", + "
| 4 | \n", + "9.800536 | \n", + "3.120373 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "
| 995 | \n", + "5.343260 | \n", + "1.831641 | \n", + "
| 996 | \n", + "39.080774 | \n", + "18.346272 | \n", + "
| 997 | \n", + "8.435708 | \n", + "2.736373 | \n", + "
| 998 | \n", + "40.580192 | \n", + "19.004747 | \n", + "
| 999 | \n", + "20.147810 | \n", + "8.920115 | \n", + "
1000 rows × 2 columns
\n", + "| \n", + " | mass_npea | \n", + "tumor_size | \n", + "
|---|---|---|
| count | \n", + "1000.000000 | \n", + "1000.000000 | \n", + "
| mean | \n", + "22.709158 | \n", + "9.805131 | \n", + "
| std | \n", + "11.682122 | \n", + "5.842747 | \n", + "
| min | \n", + "1.575483 | \n", + "0.092640 | \n", + "
| 25% | \n", + "12.290811 | \n", + "4.719465 | \n", + "
| 50% | \n", + "22.968280 | \n", + "9.936148 | \n", + "
| 75% | \n", + "32.664439 | \n", + "14.886392 | \n", + "
| max | \n", + "44.255681 | \n", + "19.994353 | \n", + "
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()