*** Episode 1 ***
Step 1 (s = 1, r = 0) -> Up
Step 2 (s = 4, r = -0.04) -> Up
Step 3 (s = 4, r = -0.04) -> Right
Step 4 (s = 7, r = -0.04) -> Right
Step 5 (s = 10, r = -0.04) -> Right
Step 6 (s = 10, r = -0.04) -> Left
Step 7 (s = 11, r = -1) -> None
Episode reward: -1.2
Q:
Up Right Down Left None
1 -0.04 0.00 0 0 NA
2 0.00 0.00 0 0 NA
3 0.00 0.00 0 0 NA
4 -0.04 -0.04 0 0 NA
5 NA NA NA NA 0
6 0.00 0.00 0 0 NA
7 0.00 -0.04 0 0 NA
8 0.00 0.00 0 0 NA
9 0.00 0.00 0 0 NA
10 0.00 -0.04 0 -1 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 0 0 0 0
2 0 0 0 0
1 0 0 0 0
*** Episode 2 ***
Step 1 (s = 1, r = 0) -> Right
Step 2 (s = 4, r = -0.04) -> Right
Step 3 (s = 7, r = -0.04) -> Down
Step 4 (s = 7, r = -0.04) -> Up
Step 5 (s = 8, r = -0.04) -> Right
Step 6 (s = 11, r = -1) -> None
Episode reward: -1.2
Q:
Up Right Down Left None
1 -0.04 -0.04 0.00 0 NA
2 0.00 0.00 0.00 0 NA
3 0.00 0.00 0.00 0 NA
4 -0.04 -0.04 0.00 0 NA
5 NA NA NA NA 0
6 0.00 0.00 0.00 0 NA
7 -0.04 -0.04 -0.04 0 NA
8 0.00 -1.00 0.00 0 NA
9 0.00 0.00 0.00 0 NA
10 0.00 -0.04 0.00 -1 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 0 0 0 0
2 0 0 0 0
1 0 0 0 0
*** Episode 3 ***
Step 1 (s = 1, r = 0) -> Right
Step 2 (s = 1, r = -0.04) -> Down
Step 3 (s = 1, r = -0.04) -> Down
Step 4 (s = 1, r = -0.04) -> Left
Step 5 (s = 1, r = -0.04) -> Left
Step 6 (s = 1, r = -0.04) -> Up
Step 7 (s = 2, r = -0.04) -> Up
Step 8 (s = 2, r = -0.04) -> Right
Step 9 (s = 2, r = -0.04) -> Down
Step 10 (s = 1, r = -0.04) -> Up
Step 11 (s = 2, r = -0.04) -> Left
Step 12 (s = 2, r = -0.04) -> Left
Step 13 (s = 3, r = -0.04) -> Right
Step 14 (s = 6, r = -0.04) -> Up
Step 15 (s = 9, r = -0.04) -> Down
Step 16 (s = 8, r = -0.04) -> Up
Step 17 (s = 11, r = -1) -> None
Episode reward: -1.6
Q:
Up Right Down Left None
1 -0.04 -0.04 -0.04 -0.076 NA
2 -0.04 -0.04 -0.08 -0.040 NA
3 0.00 -0.04 0.00 0.000 NA
4 -0.04 -0.04 0.00 0.000 NA
5 NA NA NA NA 0
6 -0.04 0.00 0.00 0.000 NA
7 -0.04 -0.04 -0.04 0.000 NA
8 -1.00 -1.00 0.00 0.000 NA
9 0.00 0.00 -0.04 0.000 NA
10 0.00 -0.04 0.00 -1.000 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 0.00 0 0 0
2 -0.04 0 0 0
1 -0.04 0 0 0
*** Episode 4 ***
Step 1 (s = 1, r = 0) -> Down
Step 2 (s = 1, r = -0.04) -> Right
Step 3 (s = 4, r = -0.04) -> Down
Step 4 (s = 4, r = -0.04) -> Left
Step 5 (s = 1, r = -0.04) -> Right
Step 6 (s = 4, r = -0.04) -> Down
Step 7 (s = 1, r = -0.04) -> Up
Step 8 (s = 2, r = -0.04) -> Left
Step 9 (s = 3, r = -0.04) -> Left
Step 10 (s = 2, r = -0.04) -> Right
Step 11 (s = 2, r = -0.04) -> Up
Step 12 (s = 3, r = -0.04) -> Up
Step 13 (s = 3, r = -0.04) -> Down
Step 14 (s = 2, r = -0.04) -> Left
Step 15 (s = 2, r = -0.04) -> Left
Step 16 (s = 2, r = -0.04) -> Up
Step 17 (s = 3, r = -0.04) -> Right
Step 18 (s = 2, r = -0.04) -> Up
Step 19 (s = 3, r = -0.04) -> Up
Step 20 (s = 3, r = -0.04) -> Left
Step 21 (s = 3, r = -0.04) -> Down
Step 22 (s = 6, r = -0.04) -> Left
Step 23 (s = 6, r = -0.04) -> Down
Step 24 (s = 3, r = -0.04) -> Down
Step 25 (s = 2, r = -0.04) -> Down
Step 26 (s = 2, r = -0.04) -> Right
Step 27 (s = 2, r = -0.04) -> Left
Step 28 (s = 2, r = -0.04) -> Left
Step 29 (s = 2, r = -0.04) -> Up
Step 30 (s = 3, r = -0.04) -> Up
Step 31 (s = 6, r = -0.04) -> Right
Step 32 (s = 9, r = -0.04) -> Up
Step 33 (s = 6, r = -0.04) -> Up
Step 34 (s = 6, r = -0.04) -> Up
Step 35 (s = 3, r = -0.04) -> Up
Step 36 (s = 3, r = -0.04) -> Up
Step 37 (s = 3, r = -0.04) -> Up
Step 38 (s = 3, r = -0.04) -> Down
Step 39 (s = 6, r = -0.04) -> Left
Step 40 (s = 3, r = -0.04) -> Down
Step 41 (s = 6, r = -0.04) -> Right
Step 42 (s = 9, r = -0.04) -> Right
Step 43 (s = 12, r = 1) -> None
Episode reward: -0.64
Q:
Up Right Down Left None
1 -0.071 -0.071 -0.073 -0.076 NA
2 -0.106 -0.110 -0.113 -0.113 NA
3 -0.131 -0.107 -0.082 -0.113 NA
4 -0.040 -0.040 -0.076 -0.080 NA
5 NA NA NA NA 0
6 -0.084 -0.040 -0.084 -0.118 NA
7 -0.040 -0.040 -0.040 0.000 NA
8 -1.000 -1.000 0.000 0.000 NA
9 -0.080 1.000 -0.040 0.000 NA
10 0.000 -0.040 0.000 -1.000 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.082 -0.04 1 0
2 -0.106 0.00 0 0
1 -0.071 -0.04 0 0
*** Episode 5 ***
Step 1 (s = 1, r = 0) -> Down
Step 2 (s = 1, r = -0.04) -> Right
Step 3 (s = 4, r = -0.04) -> Right
Step 4 (s = 7, r = -0.04) -> Left
Step 5 (s = 4, r = -0.04) -> Right
Step 6 (s = 7, r = -0.04) -> Down
Step 7 (s = 7, r = -0.04) -> Up
Step 8 (s = 8, r = -0.04) -> Left
Step 9 (s = 8, r = -0.04) -> Left
Step 10 (s = 9, r = -0.04) -> Right
Step 11 (s = 12, r = 1) -> None
Episode reward: 0.64
Q:
Up Right Down Left None
1 -0.071 -0.077 -0.102 -0.076 NA
2 -0.106 -0.110 -0.113 -0.113 NA
3 -0.131 -0.107 -0.082 -0.113 NA
4 -0.040 -0.071 -0.076 -0.080 NA
5 NA NA NA NA 0
6 -0.084 -0.040 -0.084 -0.118 NA
7 -0.040 -0.040 -0.076 -0.080 NA
8 -1.000 -1.000 0.000 0.869 NA
9 -0.080 1.000 -0.040 0.000 NA
10 0.000 -0.040 0.000 -1.000 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.082 -0.04 1.00 0
2 -0.106 0.00 0.87 0
1 -0.071 -0.04 -0.04 0
*** Episode 6 ***
Step 1 (s = 1, r = 0) -> Up
Step 2 (s = 2, r = -0.04) -> Right
Step 3 (s = 2, r = -0.04) -> Up
Step 4 (s = 3, r = -0.04) -> Down
Step 5 (s = 2, r = -0.04) -> Right
Step 6 (s = 2, r = -0.04) -> Left
Step 7 (s = 2, r = -0.04) -> Left
Step 8 (s = 2, r = -0.04) -> Down
Step 9 (s = 2, r = -0.04) -> Down
Step 10 (s = 1, r = -0.04) -> Left
Step 11 (s = 1, r = -0.04) -> Left
Step 12 (s = 2, r = -0.04) -> Up
Step 13 (s = 3, r = -0.04) -> Down
Step 14 (s = 6, r = -0.04) -> Right
Step 15 (s = 6, r = -0.04) -> Right
Step 16 (s = 9, r = -0.04) -> Right
Step 17 (s = 12, r = 1) -> None
Episode reward: 0.4
Q:
Up Right Down Left None
1 -0.124 -0.077 -0.102 -0.15 NA
2 -0.135 -0.149 -0.123 -0.15 NA
3 -0.131 -0.107 -0.098 -0.11 NA
4 -0.040 -0.071 -0.076 -0.08 NA
5 NA NA NA NA 0
6 -0.084 0.722 -0.084 -0.12 NA
7 -0.040 -0.040 -0.076 -0.08 NA
8 -1.000 -1.000 0.000 0.87 NA
9 -0.080 1.000 -0.040 0.00 NA
10 0.000 -0.040 0.000 -1.00 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.098 0.72 1.00 0
2 -0.123 0.00 0.87 0
1 -0.077 -0.04 -0.04 0
*** Episode 7 ***
Step 1 (s = 1, r = 0) -> Right
Step 2 (s = 4, r = -0.04) -> Up
Step 3 (s = 4, r = -0.04) -> Up
Step 4 (s = 1, r = -0.04) -> Right
Step 5 (s = 4, r = -0.04) -> Right
Step 6 (s = 7, r = -0.04) -> Right
Step 7 (s = 10, r = -0.04) -> Down
Step 8 (s = 10, r = -0.04) -> Down
Step 9 (s = 10, r = -0.04) -> Up
Step 10 (s = 11, r = -1) -> None
Episode reward: -1.3
Q:
Up Right Down Left None
1 -0.124 -0.099 -0.102 -0.15 NA
2 -0.135 -0.149 -0.123 -0.15 NA
3 -0.131 -0.107 -0.098 -0.11 NA
4 -0.112 -0.077 -0.076 -0.08 NA
5 NA NA NA NA 0
6 -0.084 0.722 -0.084 -0.12 NA
7 -0.040 -0.040 -0.076 -0.08 NA
8 -1.000 -1.000 0.000 0.87 NA
9 -0.080 1.000 -0.040 0.00 NA
10 -1.000 -0.040 -0.040 -1.00 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.098 0.722 1.00 0.00
2 -0.123 0.000 0.87 0.00
1 -0.099 -0.076 -0.04 -0.04
*** Episode 8 ***
Step 1 (s = 1, r = 0) -> Right
Step 2 (s = 4, r = -0.04) -> Down
Step 3 (s = 4, r = -0.04) -> Down
Step 4 (s = 4, r = -0.04) -> Left
Step 5 (s = 1, r = -0.04) -> Down
Step 6 (s = 4, r = -0.04) -> Right
Step 7 (s = 7, r = -0.04) -> Up
Step 8 (s = 8, r = -0.04) -> Left
Step 9 (s = 8, r = -0.04) -> Left
Step 10 (s = 8, r = -0.04) -> Left
Step 11 (s = 8, r = -0.04) -> Left
Step 12 (s = 8, r = -0.04) -> Left
Step 13 (s = 8, r = -0.04) -> Up
Step 14 (s = 9, r = -0.04) -> Right
Step 15 (s = 12, r = 1) -> None
Episode reward: 0.48
Q:
Up Right Down Left None
1 -0.124 -0.109 -0.113 -0.15 NA
2 -0.135 -0.149 -0.123 -0.15 NA
3 -0.131 -0.107 -0.098 -0.11 NA
4 -0.112 -0.079 -0.116 -0.14 NA
5 NA NA NA NA 0
6 -0.084 0.722 -0.084 -0.12 NA
7 0.684 -0.040 -0.076 -0.08 NA
8 0.782 -1.000 0.000 0.72 NA
9 -0.080 1.000 -0.040 0.00 NA
10 -1.000 -0.040 -0.040 -1.00 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.098 0.722 1.00 0.00
2 -0.123 0.000 0.78 0.00
1 -0.109 -0.079 0.68 -0.04
*** Episode 9 ***
Step 1 (s = 1, r = 0) -> Up
Step 2 (s = 2, r = -0.04) -> Down
Step 3 (s = 1, r = -0.04) -> Right
Step 4 (s = 4, r = -0.04) -> Left
Step 5 (s = 1, r = -0.04) -> Down
Step 6 (s = 1, r = -0.04) -> Down
Step 7 (s = 4, r = -0.04) -> Right
Step 8 (s = 7, r = -0.04) -> Up
Step 9 (s = 8, r = -0.04) -> Up
Step 10 (s = 9, r = -0.04) -> Right
Step 11 (s = 9, r = -0.04) -> Right
Step 12 (s = 8, r = -0.04) -> Up
Step 13 (s = 9, r = -0.04) -> Right
Step 14 (s = 12, r = 1) -> None
Episode reward: 0.52
Q:
Up Right Down Left None
1 -0.150 -0.11 -0.127 -0.15 NA
2 -0.135 -0.15 -0.142 -0.15 NA
3 -0.131 -0.11 -0.098 -0.11 NA
4 -0.112 0.37 -0.116 -0.15 NA
5 NA NA NA NA 0
6 -0.084 0.72 -0.084 -0.12 NA
7 0.729 -0.04 -0.076 -0.08 NA
8 0.890 -1.00 0.000 0.72 NA
9 -0.080 0.97 -0.040 0.00 NA
10 -1.000 -0.04 -0.040 -1.00 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.098 0.72 0.97 0.00
2 -0.135 0.00 0.89 0.00
1 -0.115 0.37 0.73 -0.04
*** Episode 10 ***
Step 1 (s = 1, r = 0) -> Right
Step 2 (s = 4, r = -0.04) -> Right
Step 3 (s = 4, r = -0.04) -> Right
Step 4 (s = 7, r = -0.04) -> Up
Step 5 (s = 8, r = -0.04) -> Up
Step 6 (s = 9, r = -0.04) -> Right
Step 7 (s = 9, r = -0.04) -> Up
Step 8 (s = 9, r = -0.04) -> Right
Step 9 (s = 12, r = 1) -> None
Episode reward: 0.72
Q:
Up Right Down Left None
1 -0.150 0.12 -0.127 -0.15 NA
2 -0.135 -0.15 -0.142 -0.15 NA
3 -0.131 -0.11 -0.098 -0.11 NA
4 -0.112 0.54 -0.116 -0.15 NA
5 NA NA NA NA 0
6 -0.084 0.72 -0.084 -0.12 NA
7 0.815 -0.04 -0.076 -0.08 NA
8 0.918 -1.00 0.000 0.72 NA
9 0.816 0.98 -0.040 0.00 NA
10 -1.000 -0.04 -0.040 -1.00 NA
11 NA NA NA NA 0
12 NA NA NA NA 0
U:
col
row 1 2 3 4
3 -0.098 0.72 0.98 0.00
2 -0.135 0.00 0.92 0.00
1 0.121 0.54 0.81 -0.04