/**
 * Draft for simulation lab 7
 *
 */
public class Simulation {
	public static void main(String[] args) {
	// number of environment states
	int N = 6;
	// states array
	int[] s = new int[N];
	// reward array
	int[] r = new int[N];
	r[0] = -1;
	r[N-1] = 1;
	// number of cycle 
	int Tfin = 10000;
	// probability of going right
	double p = .8;
	// future discounting factor
	double gamma = .5;
	// V
	double V=0;
	
	// current state for my agent
	int current_state;
	// choose randomly the first state
	current_state = (int) Math.floor(Math.random()*N*.99999999999999);
	s[current_state]++;
	V += gamma*r[current_state];
	
	double left_right;
	
	for (int time =0; time < Tfin; time++){
		left_right = Math.random();
		if (left_right < p){  // go right
			if (current_state==N-1)
				current_state = N-1;
			else
				current_state++;
			
		} else { // go left
			if (current_state==0)
				current_state = 0;
			else
				current_state--;
		}
		s[current_state]++;
		V += gamma*r[current_state];	
	}
	for (int i=0; i < N; i++){
		System.out.println("P(s"+i+") = " + (double)s[i]/(Tfin+1));
	}
	System.out.println("V = "+ V);
	
	}
}