diff --git a/Kushagra/.ipynb_checkpoints/Policy evalutaion-checkpoint.ipynb b/Kushagra/.ipynb_checkpoints/Policy evalutaion-checkpoint.ipynb new file mode 100644 index 0000000..9a686bd --- /dev/null +++ b/Kushagra/.ipynb_checkpoints/Policy evalutaion-checkpoint.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import copy" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "pi = (np.ones((4,4,4))*0.25)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def evaluation():\n", + " \n", + " directions= ['UP','DOWN','LEFT','RIGHT']\n", + " dir_values=[-1,1,-1,1]\n", + "\n", + " dir_l=list(zip(directions,dir_values))\n", + " \n", + " grid = np.zeros((6,6)) # creating a grid of 4 by 4 with one extra cell in the periphery \n", + " #grid[1:5,1:5] = np.zeros((4,4)) # random intitialisation of the state values\n", + " #grid[1,1]=grid[4,4]=0 # assigning the terminal state value as zero\n", + " v=0 # comparision matrix, used to find convergence of solution\n", + " r=-1 # reward for each action\n", + " gamma=0.99999# discount reward factor\n", + " theta= 0.000001 # convergence error \n", + " grid_old=np.zeros((6,6))\n", + " count=0\n", + " while True:\n", + " delta =0\n", + " for i in range(0,6):\n", + " if i==1:\n", + " grid[i-1,:] = grid[i,:]\n", + "\n", + " if i==4:\n", + "\n", + " grid[i+1,:] = grid[i,:]# updating the periphery values of the 6 by 6 grid\n", + " for j in range(0,6):\n", + " if j==1:\n", + " grid[:,j-1]=grid[:,j]\n", + "\n", + " if j==4:\n", + " grid[:,j+1]=grid[:,j]\n", + "\n", + "\n", + "\n", + " grid_old=grid\n", + "\n", + " for k in range (1,5): # running loop within the actual grid\n", + " for l in range (1,5):\n", + " if (k==1 and l==1) or (k==4 and l==4):\n", + " continue\n", + " \n", + " v=grid[k,l]\n", + " \n", + " grid[k,l]= pi[k-1,l-1,0]*(r+gamma*(grid_old[k-1,l]))\n", + " \n", + " for m in range(1,2):\n", + " \n", + " grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k+dir_l[m][1],l]))\n", + " \n", + " for m in range(2,4):\n", + " grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k,l+dir_l[m][1]]))\n", + " #updating the State values for all states except terminal \n", + "\n", + " delta = max(delta ,abs(v-grid[k,l]))\n", + "\n", + "\n", + " if (delta\n", + "\n", + " # TypeError: object of type 'zip' has no len()\n", + "\n", + " # TypeError: 'zip' object is not subscriptable\n", + "\n", + "list_c = list(zipped) #Output: [(1, 4), (2, 5), (3, 6)]\n", + "\n", + "print(list_c[1][0]) # Output []... Output is empty list becuase by the above statement zip got exhausted." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Kushagra/Policy evalutaion.ipynb b/Kushagra/Policy evalutaion.ipynb new file mode 100644 index 0000000..5984007 --- /dev/null +++ b/Kushagra/Policy evalutaion.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import copy" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "pi = (np.ones((4,4,4))*0.25)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def evaluation():\n", + " \n", + " directions= ['UP','DOWN','LEFT','RIGHT']\n", + " dir_values=[-1,1,-1,1]\n", + "\n", + " dir_l=list(zip(directions,dir_values))\n", + " \n", + " grid = np.zeros((6,6)) # creating a grid of 4 by 4 with one extra cell in the periphery \n", + " #grid[1:5,1:5] = np.zeros((4,4)) # random intitialisation of the state values\n", + " #grid[1,1]=grid[4,4]=0 # assigning the terminal state value as zero\n", + " v=0 # comparision matrix, used to find convergence of solution\n", + " r=-1 # reward for each action\n", + " gamma=0.999# discount reward factor\n", + " theta= 0.000001 # convergence error \n", + " grid_old=np.zeros((6,6))\n", + " count=0\n", + " while True:\n", + " delta =0\n", + " for i in range(0,6):\n", + " if i==1:\n", + " grid[i-1,:] = grid[i,:]\n", + "\n", + " if i==4:\n", + "\n", + " grid[i+1,:] = grid[i,:]# updating the periphery values of the 6 by 6 grid\n", + " for j in range(0,6):\n", + " if j==1:\n", + " grid[:,j-1]=grid[:,j]\n", + "\n", + " if j==4:\n", + " grid[:,j+1]=grid[:,j]\n", + "\n", + "\n", + "\n", + " grid_old=grid\n", + "\n", + " for k in range (1,5): # running loop within the actual grid\n", + " for l in range (1,5):\n", + " if (k==1 and l==1) or (k==4 and l==4):\n", + " continue\n", + " \n", + " v=grid[k,l]\n", + " \n", + " grid[k,l]= pi[k-1,l-1,0]*(r+gamma*(grid_old[k-1,l]))\n", + " \n", + " for m in range(1,2):\n", + " \n", + " grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k+dir_l[m][1],l]))\n", + " \n", + " for m in range(2,4):\n", + " grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k,l+dir_l[m][1]]))\n", + " #updating the State values for all states except terminal \n", + "\n", + " delta = max(delta ,abs(v-grid[k,l]))\n", + "\n", + "\n", + " if (delta\n", + "\n", + " # TypeError: object of type 'zip' has no len()\n", + "\n", + " # TypeError: 'zip' object is not subscriptable\n", + "\n", + "list_c = list(zipped) #Output: [(1, 4), (2, 5), (3, 6)]\n", + "\n", + "print(list_c[1][0]) # Output []... Output is empty list becuase by the above statement zip got exhausted." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}