smg

// scheduler
player p1
  scheduler, [p1_add], [p2_add], [p1_mult], [p2_mult], [time0]
endplayer

// environment
player p2
  P1, P2, [p1_done], [p2_done], [p1_fail], [p2_fail], [time1]
endplayer

label "tasks_complete" = (task6=3);
label "invariants" = ((turn=0&(p1>1|p2>1))=>x1<=0)&(p1=1=>x1<=2)&(p1=2=>x1<=3)&(p1=3=>x1<=0)&(p1=4=>x1<=0)&(p2=1=>x2<=5)&(p2=2=>x2<=7)&(p2=3=>x2<=0)&(p2=4=>x2<=0);

const int k1;
const int k2;
const double p;

module scheduler

	turn : [0..1];
	task1 : [0..3];
	task2 : [0..3];
	task3 : [0..3];
	task4 : [0..3];
	task5 : [0..3];
	task6 : [0..3];

	[p1_add] turn=0&task1=0 -> (task1'=1);
	[p2_add] turn=0&task1=0 -> (task1'=2);
	[p1_mult] turn=0&task2=0 -> (task2'=1);
	[p2_mult] turn=0&task2=0 -> (task2'=2);
	[p1_mult] turn=0&task3=0&task1=3 -> (task3'=1);
	[p2_mult] turn=0&task3=0&task1=3 -> (task3'=2);
	[p1_add] turn=0&task4=0&task1=3&task2=3 -> (task4'=1);
	[p2_add] turn=0&task4=0&task1=3&task2=3 -> (task4'=2);
	[p1_mult] turn=0&task5=0&task3=3 -> (task5'=1);
	[p2_mult] turn=0&task5=0&task3=3 -> (task5'=2);
	[p1_add] turn=0&task6=0&task4=3&task5=3 -> (task6'=1);
	[p2_add] turn=0&task6=0&task4=3&task5=3 -> (task6'=2);
	[p1_done] task1=1 -> (task1'=3) & (turn'=0);
	[p1_done] task2=1 -> (task2'=3) & (turn'=0);
	[p1_done] task3=1 -> (task3'=3) & (turn'=0);
	[p1_done] task4=1 -> (task4'=3) & (turn'=0);
	[p1_done] task5=1 -> (task5'=3) & (turn'=0);
	[p1_done] task6=1 -> (task6'=3) & (turn'=0);
	[p2_done] task1=2 -> (task1'=3) & (turn'=0);
	[p2_done] task2=2 -> (task2'=3) & (turn'=0);
	[p2_done] task3=2 -> (task3'=3) & (turn'=0);
	[p2_done] task4=2 -> (task4'=3) & (turn'=0);
	[p2_done] task5=2 -> (task5'=3) & (turn'=0);
	[p2_done] task6=2 -> (task6'=3) & (turn'=0);
	[p1_fail] task1=1 -> (task1'=0) & (turn'=0);
	[p1_fail] task2=1 -> (task2'=0) & (turn'=0);
	[p1_fail] task3=1 -> (task3'=0) & (turn'=0);
	[p1_fail] task4=1 -> (task4'=0) & (turn'=0);
	[p1_fail] task5=1 -> (task5'=0) & (turn'=0);
	[p1_fail] task6=1 -> (task6'=0) & (turn'=0);
	[p2_fail] task1=2 -> (task1'=0) & (turn'=0);
	[p2_fail] task2=2 -> (task2'=0) & (turn'=0);
	[p2_fail] task3=2 -> (task3'=0) & (turn'=0);
	[p2_fail] task4=2 -> (task4'=0) & (turn'=0);
	[p2_fail] task5=2 -> (task5'=0) & (turn'=0);
	[p2_fail] task6=2 -> (task6'=0) & (turn'=0);
	
	// pass over control to the environment
	[] turn=0 -> (turn'=1);
	
	// scheduler can only let time pass if nothing scheduled
	// otherwise must schedule something or give control over to the environment
	[time0] turn=0 & p1=0 & p2=0 -> 1.0 : true;
	[time1] turn=1 -> 1.0 : true;

endmodule

module P1

	p1 : [0..3];
	fail1 : [0..k1+1];
	x1 : [0..4];

	[p1_add] p1=0 -> (p1'=1) & (x1'=0);
	[p1_done] turn=1&(p1=1)&(x1<=2) -> (p1'=0) & (x1'=0);
	[] turn=1&(p1=1)&(x1<=2)&(fail1<k1) -> p : (fail1'=fail1+1) & (p1'=3) & (x1'=0) + 1-p : (fail1'=fail1+1) & (p1'=1);
	[p1_mult] p1=0 -> (p1'=2) & (x1'=0);
	[p1_done] turn=1&(p1=2)&(x1<=3) -> (p1'=0) & (x1'=0);
	[] turn=1&(p1=2)&(x1<=3)&(fail1<k1) -> p : (fail1'=fail1+1) & (p1'=3) & (x1'=0) + 1-p : (fail1'=fail1+1) & (p1'=2);
	[p1_fail] turn=1&p1=3 -> (p1'=0);
	[time1] (p1=1=>x1+1<=2)&(p1=2=>x1+1<=3)&(p1=3=>x1+1<=0) -> 1.0 : (x1'=min(x1+1, 4));

endmodule

module P2

	p2 : [0..3];
	fail2 : [0..k2+1];
	x2 : [0..8];

	[p2_add] p2=0 -> (p2'=1) & (x2'=0);
	[p2_done] turn=1&(p2=1)&(x2<=5) -> (p2'=0) & (x2'=0);
	[] turn=1&(p2=1)&(x2<=5)&(fail2<k2) -> p : (fail2'=fail2+1) & (p2'=3) & (x2'=0) + 1-p : (fail2'=fail2+1) & (p2'=1);
	[p2_mult] p2=0 -> (p2'=2) & (x2'=0);
	[p2_done] turn=1&(p2=2)&(x2<=7) -> (p2'=0) & (x2'=0);
	[] turn=1&(p2=2)&(x2<=7)&(fail2<k2) -> p : (fail2'=fail2+1) & (p2'=3) & (x2'=0) + 1-p : (fail2'=fail2+1) & (p2'=2);
	[p2_fail] turn=1&p2=3 -> (p2'=0) & (x2'=0);
	[time1] (p2=1=>x2+1<=5)&(p2=2=>x2+1<=7)&(p2=3=>x2+1<=0) -> 1.0 : (x2'=min(x2+1, 8));

endmodule

rewards "time" 

	[time0] true : 1;
	[time1] true : 1;

endrewards

rewards "energy" 

	[time0] p1=0 : 10;
	[time0] p1>0 : 90;
	[time0] p2=0 : 20;
	[time0] p2>0 : 30;
	[time1] p1=0 : 10;
	[time1] p1>0 : 90;
	[time1] p2=0 : 20;
	[time1] p2>0 : 30;

endrewards