mdp
const double p = 0.1;
const double e;
formula a = x * e;
const int M = 5;
const int N = 6;
const double eps = 0.0001;
formula goal = x = 1 & y = N;
formula crashed = (x = 2 & y = 3) | (x = 3 & y = 3) | (x = 3 & y = 4);
module env
x : [1..M] init 1;
y : [1..N] init 1;
[up] !crashed & y < N -> [max(eps,1-3*(p+a)), min(1-3*(p-a),1)] : (y'=min(y+1,N))
+ [max(eps,p-a), min(p+a,1)] : (y'=min(y+1,N)) & (x'=max(x-1,1))
+ [max(eps,p-a), min(p+a,1)] : (y'=min(y+1,N)) & (x'=min(x+1,M))
+ [max(eps,p-a), min(p+a,1)] : true;
[right] !crashed & x < M -> [max(eps,1-3*(p+a)), min(1-3*(p-a),1)] : (x'=min(x+1,M))
+ [max(eps,p-a), min(p+a,1)] : (x'=min(x+1,M)) & (y'=min(y+1,N))
+ [max(eps,p-a), min(p+a,1)] : (x'=max(x-1,1))
+ [max(eps,p-a), min(p+a,1)] : true;
[left] !crashed & x > 1 -> [max(eps,1-3*(p+a)), min(1-3*(p-a),1)] : (x'=max(x-1,1))
+ [max(eps,p-a), min(p+a,1)] : (x'=max(x-1,1)) & (y'=min(y+1,N))
+ [max(eps,p-a), min(p+a,1)] : (x'=min(x+1,M))
+ [max(eps,p-a), min(p+a,1)] : true;
endmodule
module policy
[up] x < 3 -> true;
[left] x >= 3 -> true;
[right] false -> true;
endmodule
label "crashed" = crashed;
label "goal" = goal;