Skip to content
This repository was archived by the owner on Mar 14, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions Kushagra/.ipynb_checkpoints/Policy evalutaion-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"import copy"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"\n",
"pi = (np.ones((4,4,4))*0.25)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def evaluation():\n",
" \n",
" directions= ['UP','DOWN','LEFT','RIGHT']\n",
" dir_values=[-1,1,-1,1]\n",
"\n",
" dir_l=list(zip(directions,dir_values))\n",
" \n",
" grid = np.zeros((6,6)) # creating a grid of 4 by 4 with one extra cell in the periphery \n",
" #grid[1:5,1:5] = np.zeros((4,4)) # random intitialisation of the state values\n",
" #grid[1,1]=grid[4,4]=0 # assigning the terminal state value as zero\n",
" v=0 # comparision matrix, used to find convergence of solution\n",
" r=-1 # reward for each action\n",
" gamma=0.99999# discount reward factor\n",
" theta= 0.000001 # convergence error \n",
" grid_old=np.zeros((6,6))\n",
" count=0\n",
" while True:\n",
" delta =0\n",
" for i in range(0,6):\n",
" if i==1:\n",
" grid[i-1,:] = grid[i,:]\n",
"\n",
" if i==4:\n",
"\n",
" grid[i+1,:] = grid[i,:]# updating the periphery values of the 6 by 6 grid\n",
" for j in range(0,6):\n",
" if j==1:\n",
" grid[:,j-1]=grid[:,j]\n",
"\n",
" if j==4:\n",
" grid[:,j+1]=grid[:,j]\n",
"\n",
"\n",
"\n",
" grid_old=grid\n",
"\n",
" for k in range (1,5): # running loop within the actual grid\n",
" for l in range (1,5):\n",
" if (k==1 and l==1) or (k==4 and l==4):\n",
" continue\n",
" \n",
" v=grid[k,l]\n",
" \n",
" grid[k,l]= pi[k-1,l-1,0]*(r+gamma*(grid_old[k-1,l]))\n",
" \n",
" for m in range(1,2):\n",
" \n",
" grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k+dir_l[m][1],l]))\n",
" \n",
" for m in range(2,4):\n",
" grid[k,l]+= (pi[k-1,l-1,m])*(r+gamma*(grid_old[k,l+dir_l[m][1]]))\n",
" #updating the State values for all states except terminal \n",
"\n",
" delta = max(delta ,abs(v-grid[k,l]))\n",
"\n",
"\n",
" if (delta<theta): #checking for convergenge of solution\n",
" #final comparison matrix\n",
" break\n",
"\n",
" print(grid[1:5,1:5]) # final state value matrix for all states \n",
" \n",
" return "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0. -13.9975738 -19.99641111 -21.99599023]\n",
" [-13.9975738 -17.99683243 -19.9964318 -19.99641191]\n",
" [-19.99641111 -19.9964318 -17.99683311 -13.99757487]\n",
" [-21.99599023 -19.99641191 -13.99757487 0. ]]\n"
]
}
],
"source": [
"evaluation()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading