mdp
const int MAX=4;
formula leaders_agree1 = (p1=1 | r1<max(r1,r2)) & (p2=1 | r2<max(r1,r2));
formula leaders_agree2 = (p1=2 | r1<max(r1,r2)) & (p2=2 | r2<max(r1,r2));
formula decide1 = leaders_agree1 & (p1=1 | r1<max(r1,r2)-1) & (p2=1 | r2<max(r1,r2)-1);
formula decide2 = leaders_agree2 & (p1=2 | r1<max(r1,r2)-1) & (p2=2 | r2<max(r1,r2)-1);
module process1
s1 : [0..5];
r1 : [0..MAX];
p1 : [0..2];
[] s1=0 & r1=0 -> (p1'=1) & (r1'=1);
[] s1=0 & r1=0 -> (p1'=2) & (r1'=1);
[] s1=0 & r1>0 & r1<=MAX -> (s1'=1);
[] s1=1 & decide1 -> (s1'=4) & (p1'=1);
[] s1=1 & decide2 -> (s1'=4) & (p1'=2);
[] s1=1 & r1<MAX & leaders_agree1 & !decide1 -> (s1'=0) & (p1'=1) & (r1'=r1+1);
[] s1=1 & r1<MAX & leaders_agree2 & !decide2 -> (s1'=0) & (p1'=2) & (r1'=r1+1);
[] s1=1 & r1<MAX & !(leaders_agree1 | leaders_agree2) -> (s1'=2) & (p1'=0);
[] s1=1 & r1=MAX & !(decide1 | decide2) -> (s1'=5);
[coin1_s1_start] s1=2 & r1=1 -> (s1'=3);
[coin2_s1_start] s1=2 & r1=2 -> (s1'=3);
[coin3_s1_start] s1=2 & r1=3 -> (s1'=3);
[coin1_s1_p1] s1=3 & r1=1 -> (s1'=0) & (p1'=1) & (r1'=r1+1);
[coin1_s1_p2] s1=3 & r1=1 -> (s1'=0) & (p1'=2) & (r1'=r1+1);
[coin2_s1_p1] s1=3 & r1=2 -> (s1'=0) & (p1'=1) & (r1'=r1+1);
[coin2_s1_p2] s1=3 & r1=2 -> (s1'=0) & (p1'=2) & (r1'=r1+1);
[coin3_s1_end] s1=3 & r1=3 -> (s1'=0) & (r1'=r1+1);
[done] s1>=4 -> true;
endmodule
module process2 = process1[ s1=s2,
p1=p2,p2=p1,
r1=r2,r2=r1,
coin1_s1_start=coin1_s2_start,coin2_s1_start=coin2_s2_start,coin3_s1_start=coin3_s2_start,
coin1_s1_p1=coin1_s2_p1,coin2_s1_p1=coin2_s2_p1,
coin1_s1_p2=coin1_s2_p2,coin2_s1_p2=coin2_s2_p2,
coin3_s1_end=coin3_s2_end ]
endmodule
const int N=2;
const int K;
const int range = 2*(K+1)*N;
const int counter_init = (K+1)*N;
const int left = N;
const int right= 2*(K+1)*N -N;
global counter1 : [0..range] init counter_init;
global counter2 : [0..range] init counter_init;
module r1_coin1
r1_start1 : bool;
r1_pc1 : [0..3];
r1_coin1 : [0..1];
[coin1_s1_start] !r1_start1 -> (r1_start1'=true);
[] r1_start1 & (r1_pc1=0) -> 0.5 : (r1_coin1'=0) & (r1_pc1'=1) + 0.5 : (r1_coin1'=1) & (r1_pc1'=1);
[] r1_start1 & (r1_pc1=1) & (r1_coin1=0) & (counter1>0) -> (counter1'=counter1-1) & (r1_pc1'=2) & (r1_coin1'=0);
[] r1_start1 & (r1_pc1=1) & (r1_coin1=1) & (counter1<range) -> (counter1'=counter1+1) & (r1_pc1'=2) & (r1_coin1'=0);
[coin1_s1_p1] r1_start1 & (r1_pc1=2) & (counter1<=left) -> (r1_pc1'=3) & (r1_coin1'=0);
[coin1_s1_p2] r1_start1 & (r1_pc1=2) & (counter1>=right) -> (r1_pc1'=3) & (r1_coin1'=1);
[] r1_start1 & (r1_pc1=2) & (counter1>left) & (counter1<right) -> (r1_pc1'=0);
endmodule
module r1_coin2 = r1_coin1[r1_start1=r1_start2,r1_pc1=r1_pc2,r1_coin1=r1_coin2,coin1_s1_start=coin1_s2_start,coin1_s1_p1=coin1_s2_p1,coin1_s1_p2=coin1_s2_p2,counter1=counter1] endmodule
module r2_coin1 = r1_coin1[r1_start1=r2_start1,r1_pc1=r2_pc1,r1_coin1=r2_coin1,coin1_s1_start=coin2_s1_start,coin1_s1_p1=coin2_s1_p1,coin1_s1_p2=coin2_s1_p2,counter1=counter2] endmodule
module r2_coin2 = r1_coin1[r1_start1=r2_start2,r1_pc1=r2_pc2,r1_coin1=r2_coin2,coin1_s1_start=coin2_s2_start,coin1_s1_p1=coin2_s2_p1,coin1_s1_p2=coin2_s2_p2,counter1=counter2] endmodule
label "agree1" = s1=4 & s2=4 & p1=1 & p2=1;
label "agree2" = s1=4 & s2=4 & p1=2 & p2=2;
rewards "steps"
[] true : 1;
[coin1_s1_start] true : 1;
[coin1_s2_start] true : 1;
[coin1_s1_p1] true : 1;
[coin1_s1_p2] true : 1;
[coin1_s2_p1] true : 1;
[coin1_s2_p2] true : 1;
[coin2_s1_start] true : 1;
[coin2_s2_start] true : 1;
[coin2_s1_p1] true : 1;
[coin2_s1_p2] true : 1;
[coin2_s2_p1] true : 1;
[coin2_s2_p2] true : 1;
[coin3_s1_start] true : 1;
[coin3_s2_start] true : 1;
[coin3_s1_end] true : 1;
[coin3_s2_end] true : 1;
endrewards