IEEE-NITK · Kushcode01 · May 16, 2020
diff --git a/Kushagra/.ipynb_checkpoints/Policy evalutaion-checkpoint.ipynb b/Kushagra/.ipynb_checkpoints/Policy evalutaion-checkpoint.ipynb
@@ -0,0 +1,144 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "import numpy as np\n",
+    "import copy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "pi = (np.ones((4,4,4))*0.25)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluation():\n",
+    "    \n",
+    "    directions= ['UP','DOWN','LEFT','RIGHT']\n",
+    "    dir_values=[-1,1,-1,1]\n",
+    "\n",
+    "    dir_l=list(zip(directions,dir_values))\n",
+    "    \n",
+    "    grid = np.zeros((6,6)) # creating a grid of 4 by 4 with one extra cell in the periphery  \n",
+    "    #grid[1:5,1:5] = np.zeros((4,4))  # random intitialisation of the state values\n",
+    "    #grid[1,1]=grid[4,4]=0  # assigning the terminal state value as zero\n",
+    "    v=0   # comparision matrix, used to find convergence of solution\n",
+    "    r=-1      # reward for each action\n",
+    "    gamma=0.99999# discount reward factor\n",
+    "    theta= 0.000001  # convergence error \n",
+    "    grid_old=np.zeros((6,6))\n",
+    "    count=0\n",
+    "    while True:\n",
+    "        delta =0\n",
+    "        for i in range(0,6):\n",
+    "            if i==1:\n",
+    "                    grid[i-1,:] = grid[i,:]\n",
+    "\n",
+    "            if i==4:\n",
+    "\n",
+    "                    grid[i+1,:] = grid[i,:]# updating the periphery values of the 6 by 6 grid\n",
+    "        for j in range(0,6):\n",
+    "            if j==1:\n",
+    "                    grid[:,j-1]=grid[:,j]\n",
+    "\n",
+    "            if j==4:\n",
+    "                    grid[:,j+1]=grid[:,j]\n",
+    "\n",
+    "\n",
+    "\n",
+    "        grid_old=grid\n",
+    "\n",
+    "        for k in range (1,5):                   # running loop within the actual grid\n",
+    "            for l in range (1,5):\n",
+    "                if (k==1 and l==1) or (k==4 and l==4):\n",
+    "                    continue\n",
+    "                \n",
+    "                v=grid[k,l]\n",
+    "                \n",
+    "                grid[k,l]= pi[k-1,l-1,0]*(r+gamma*(grid_old[k-1,l]))\n",
+    "                \n",
+    "                for m in range(1,2):\n",
+    "                    \n",
+    "                    grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k+dir_l[m][1],l]))\n",
+    "                    \n",
+    "                for m in range(2,4):\n",
+    "                    grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k,l+dir_l[m][1]]))\n",
+    "                #updating the State values for all states except terminal \n",
+    "\n",
+    "                delta = max(delta ,abs(v-grid[k,l]))\n",
+    "\n",
+    "\n",
+    "        if (delta<theta): #checking for convergenge of solution\n",
+    "            #final comparison matrix\n",
+    "            break\n",
+    "\n",
+    "    print(grid[1:5,1:5]) # final state value matrix for all states \n",
+    "    \n",
+    "    return "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[  0.         -13.9975738  -19.99641111 -21.99599023]\n",
+      " [-13.9975738  -17.99683243 -19.9964318  -19.99641191]\n",
+      " [-19.99641111 -19.9964318  -17.99683311 -13.99757487]\n",
+      " [-21.99599023 -19.99641191 -13.99757487   0.        ]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "evaluation()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}