package Catalano.MachineLearning.Exploration;

import java.util.Random;

/* loaded from: input_file:Catalano/MachineLearning/Exploration/Sarsa.class */
public class Sarsa {
    private int states;
    private int actions;
    private double[][] qvalues;
    private IExplorationPolicy explorationPolicy;
    private double discountFactor = 0.95d;
    private double learningRate = 0.25d;

    public int getStates() {
        return this.states;
    }

    public int getActions() {
        return this.actions;
    }

    public IExplorationPolicy getExplorationPolicy() {
        return this.explorationPolicy;
    }

    public void setExplorationPolicy(IExplorationPolicy iExplorationPolicy) {
        this.explorationPolicy = iExplorationPolicy;
    }

    public double getLearningRate() {
        return this.learningRate;
    }

    public void setLearningRate(double d) {
        this.learningRate = Math.max(0.0d, Math.min(1.0d, d));
    }

    public double getDiscountFactor() {
        return this.discountFactor;
    }

    public void setDiscountFactor(double d) {
        this.discountFactor = Math.max(0.0d, Math.min(1.0d, d));
    }

    /* JADX WARN: Type inference failed for: r1v6, types: [double[], double[][]] */
    public Sarsa(int i, int i2, IExplorationPolicy iExplorationPolicy, boolean z) {
        this.states = i;
        this.actions = i2;
        this.explorationPolicy = iExplorationPolicy;
        this.qvalues = new double[i];
        for (int i3 = 0; i3 < i; i3++) {
            this.qvalues[i3] = new double[i2];
        }
        if (z) {
            Random random = new Random();
            for (int i4 = 0; i4 < i; i4++) {
                for (int i5 = 0; i5 < i2; i5++) {
                    this.qvalues[i4][i5] = random.nextDouble() / 10.0d;
                }
            }
        }
    }

    public int GetAction(int i) {
        return this.explorationPolicy.ChooseAction(this.qvalues[i]);
    }

    public void UpdateState(int i, int i2, double d, int i3, int i4) {
        double[] dArr = this.qvalues[i];
        dArr[i2] = dArr[i2] * (1.0d - this.learningRate);
        dArr[i2] = dArr[i2] + (this.learningRate * (d + (this.discountFactor * this.qvalues[i3][i4])));
    }

    public void UpdateState(int i, int i2, double d) {
        double[] dArr = this.qvalues[i];
        dArr[i2] = dArr[i2] * (1.0d - this.learningRate);
        dArr[i2] = dArr[i2] + (this.learningRate * d);
    }
}
