/*
 * Decompiled with CFR 0.152.
 */
package com.datumbox.framework.core.machinelearning.clustering;

import com.datumbox.framework.common.Configuration;
import com.datumbox.framework.common.concurrency.ForkJoinStream;
import com.datumbox.framework.common.concurrency.StreamMethods;
import com.datumbox.framework.common.dataobjects.AssociativeArray;
import com.datumbox.framework.common.dataobjects.TypeInference;
import com.datumbox.framework.common.storage.interfaces.BigMap;
import com.datumbox.framework.common.storage.interfaces.StorageEngine;
import com.datumbox.framework.core.common.dataobjects.Dataframe;
import com.datumbox.framework.core.common.dataobjects.Record;
import com.datumbox.framework.core.common.utilities.MapMethods;
import com.datumbox.framework.core.common.utilities.PHPMethods;
import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer;
import com.datumbox.framework.core.machinelearning.common.abstracts.modelers.AbstractClusterer;
import com.datumbox.framework.core.machinelearning.common.interfaces.PredictParallelizable;
import com.datumbox.framework.core.machinelearning.common.interfaces.TrainParallelizable;
import com.datumbox.framework.core.mathematics.distances.Distance;
import com.datumbox.framework.core.statistics.descriptivestatistics.Descriptives;
import com.datumbox.framework.core.statistics.sampling.SimpleRandomSampling;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class Kmeans
extends AbstractClusterer<Cluster, ModelParameters, TrainingParameters>
implements PredictParallelizable,
TrainParallelizable {
    private boolean parallelized = true;
    protected final ForkJoinStream streamExecutor = new ForkJoinStream(this.knowledgeBase.getConfiguration().getConcurrencyConfiguration());

    protected Kmeans(TrainingParameters trainingParameters, Configuration configuration) {
        super(trainingParameters, configuration);
    }

    protected Kmeans(String storageName, Configuration configuration) {
        super(storageName, configuration);
    }

    @Override
    public boolean isParallelized() {
        return this.parallelized;
    }

    @Override
    public void setParallelized(boolean parallelized) {
        this.parallelized = parallelized;
    }

    @Override
    protected void _predict(Dataframe newData) {
        this._predictDatasetParallel(newData, this.knowledgeBase.getStorageEngine(), this.knowledgeBase.getConfiguration().getConcurrencyConfiguration());
    }

    @Override
    public PredictParallelizable.Prediction _predictRecord(Record r) {
        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
        Map clusterMap = modelParameters.getClusterMap();
        AssociativeArray clusterDistances = new AssociativeArray();
        for (Map.Entry e : clusterMap.entrySet()) {
            Integer clusterId = e.getKey();
            Cluster c = (Cluster)e.getValue();
            double distance = this.calculateDistance(r, c.getCentroid());
            clusterDistances.put(clusterId, distance);
        }
        Descriptives.normalize(clusterDistances);
        return new PredictParallelizable.Prediction(this.getSelectedClusterFromDistances(clusterDistances), clusterDistances);
    }

    @Override
    protected void _fit(Dataframe trainingData) {
        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
        Set<Object> goldStandardClasses = modelParameters.getGoldStandardClasses();
        for (Record r : trainingData) {
            Object theClass = r.getY();
            if (theClass == null) continue;
            goldStandardClasses.add(theClass);
        }
        this.calculateFeatureWeights(trainingData);
        this.initializeClusters(trainingData);
        this.calculateClusters(trainingData);
        this.clearClusters();
    }

    private void calculateFeatureWeights(Dataframe trainingData) {
        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
        TrainingParameters trainingParameters = (TrainingParameters)this.knowledgeBase.getTrainingParameters();
        Map<Object, TypeInference.DataType> columnTypes = trainingData.getXDataTypes();
        Map<Object, Double> featureWeights = modelParameters.getFeatureWeights();
        if (!trainingParameters.isWeighted()) {
            double gammaWeight = trainingParameters.getCategoricalGamaMultiplier();
            this.streamExecutor.forEach(StreamMethods.stream(columnTypes.entrySet().stream(), this.isParallelized()), e -> {
                double weight = e.getValue() != TypeInference.DataType.NUMERICAL ? gammaWeight : 1.0;
                featureWeights.put(e.getKey(), weight);
            });
        } else {
            int n = trainingData.size();
            StorageEngine storageEngine = this.knowledgeBase.getStorageEngine();
            Map<Object, Double> tmp_categoricalFrequencies = storageEngine.getBigMap("tmp_categoricalFrequencies", Object.class, Double.class, StorageEngine.MapType.HASHMAP, StorageEngine.StorageHint.IN_MEMORY, true, true);
            Map<Object, Double> tmp_varianceSumX = storageEngine.getBigMap("tmp_varianceSumX", Object.class, Double.class, StorageEngine.MapType.HASHMAP, StorageEngine.StorageHint.IN_MEMORY, true, true);
            Map<Object, Double> tmp_varianceSumXsquare = storageEngine.getBigMap("tmp_varianceSumXsquare", Object.class, Double.class, StorageEngine.MapType.HASHMAP, StorageEngine.StorageHint.IN_MEMORY, true, true);
            for (Record r : trainingData) {
                for (Map.Entry<Object, Object> e2 : r.getX().entrySet()) {
                    Double value = TypeInference.toDouble(e2.getValue());
                    if (value == null || value == 0.0) continue;
                    Object feature = e2.getKey();
                    if (columnTypes.get(feature) != TypeInference.DataType.NUMERICAL) {
                        Double previousValue = tmp_categoricalFrequencies.getOrDefault(feature, 0.0);
                        tmp_categoricalFrequencies.put(feature, previousValue + 1.0);
                        continue;
                    }
                    Double previousValueSumX = tmp_varianceSumX.getOrDefault(feature, 0.0);
                    Double previousValueSumXsquare = tmp_varianceSumXsquare.getOrDefault(feature, 0.0);
                    tmp_varianceSumX.put(feature, previousValueSumX + value);
                    tmp_varianceSumXsquare.put(feature, previousValueSumXsquare + value * value);
                }
            }
            double gammaWeight = trainingParameters.getCategoricalGamaMultiplier();
            this.streamExecutor.forEach(StreamMethods.stream(columnTypes.entrySet().stream(), this.isParallelized()), e -> {
                double weight;
                Object feature = e.getKey();
                TypeInference.DataType type = (TypeInference.DataType)((Object)((Object)e.getValue()));
                if (type != TypeInference.DataType.NUMERICAL) {
                    double percentage = (Double)tmp_categoricalFrequencies.get(feature) / (double)n;
                    weight = 1.0 - percentage * percentage;
                } else {
                    double mean = (Double)tmp_varianceSumX.get(feature) / (double)n;
                    weight = 2.0 * ((Double)tmp_varianceSumXsquare.get(feature) / (double)n - mean * mean);
                }
                if (weight > 0.0) {
                    weight = 1.0 / weight;
                }
                if (type != TypeInference.DataType.NUMERICAL) {
                    weight *= gammaWeight;
                }
                featureWeights.put(feature, weight);
            });
            storageEngine.dropBigMap("tmp_categoricalFrequencies", tmp_categoricalFrequencies);
            storageEngine.dropBigMap("tmp_varianceSumX", tmp_categoricalFrequencies);
            storageEngine.dropBigMap("tmp_varianceSumXsquare", tmp_categoricalFrequencies);
        }
    }

    private double calculateDistance(Record r1, Record r2) {
        double distance;
        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
        TrainingParameters trainingParameters = (TrainingParameters)this.knowledgeBase.getTrainingParameters();
        Map<Object, Double> featureWeights = modelParameters.getFeatureWeights();
        TrainingParameters.Distance distanceMethod = trainingParameters.getDistanceMethod();
        if (distanceMethod == TrainingParameters.Distance.EUCLIDIAN) {
            distance = Distance.euclideanWeighted(r1.getX(), r2.getX(), featureWeights);
        } else if (distanceMethod == TrainingParameters.Distance.MANHATTAN) {
            distance = Distance.manhattanWeighted(r1.getX(), r2.getX(), featureWeights);
        } else {
            throw new IllegalArgumentException("Unsupported Distance method.");
        }
        return distance;
    }

    private Object getSelectedClusterFromDistances(AssociativeArray clusterDistances) {
        Map.Entry<Object, Object> minEntry = MapMethods.selectMinKeyValue(clusterDistances);
        return minEntry.getKey();
    }

    /*
     * WARNING - void declaration
     */
    private void initializeClusters(Dataframe trainingData) {
        block11: {
            void var9_22;
            Map clusterMap;
            TrainingParameters.Initialization initializationMethod;
            int k;
            block14: {
                void var9_20;
                TrainingParameters trainingParameters;
                block13: {
                    Cluster c;
                    Integer clusterId;
                    block12: {
                        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
                        trainingParameters = (TrainingParameters)this.knowledgeBase.getTrainingParameters();
                        k = trainingParameters.getK();
                        initializationMethod = trainingParameters.getInitializationMethod();
                        clusterMap = modelParameters.getClusterMap();
                        if (initializationMethod != TrainingParameters.Initialization.SET_FIRST_K && initializationMethod != TrainingParameters.Initialization.FORGY) break block12;
                        int i = 0;
                        for (Record record : trainingData.values()) {
                            if (i < k) {
                                Integer clusterId2 = i;
                                Cluster c2 = new Cluster((int)clusterId2);
                                c2.add(record);
                                c2.updateClusterParameters();
                                clusterMap.put(clusterId2, c2);
                                ++i;
                                continue;
                            }
                            break block11;
                        }
                        break block11;
                    }
                    if (initializationMethod != TrainingParameters.Initialization.RANDOM_PARTITION) break block13;
                    int i = 0;
                    for (Record record : trainingData.values()) {
                        clusterId = i % k;
                        c = (Cluster)clusterMap.get(clusterId);
                        if (c == null) {
                            c = new Cluster((int)clusterId);
                        }
                        c.add(record);
                        clusterMap.put(clusterId, c);
                        ++i;
                    }
                    for (Map.Entry entry : clusterMap.entrySet()) {
                        clusterId = (Integer)entry.getKey();
                        c = (Cluster)entry.getValue();
                        c.updateClusterParameters();
                        clusterMap.put(clusterId, c);
                    }
                    break block11;
                }
                if (initializationMethod != TrainingParameters.Initialization.FURTHEST_FIRST && initializationMethod != TrainingParameters.Initialization.SUBSET_FURTHEST_FIRST) break block14;
                int sampleSize = trainingData.size();
                if (initializationMethod == TrainingParameters.Initialization.SUBSET_FURTHEST_FIRST) {
                    sampleSize = (int)Math.max(Math.ceil(trainingParameters.getSubsetFurthestFirstcValue() * (double)k * PHPMethods.log(k, 2.0)), (double)k);
                }
                HashSet<Integer> alreadyAddedPoints = new HashSet<Integer>();
                boolean bl = false;
                while (var9_20 < k) {
                    Integer selectedRecordId = null;
                    double maxMinDistance = 0.0;
                    int samplePointCounter = 0;
                    for (Map.Entry<Integer, Record> e3 : trainingData.entries()) {
                        Integer rId = e3.getKey();
                        Record r = e3.getValue();
                        if (samplePointCounter > sampleSize) break;
                        if (alreadyAddedPoints.contains(rId)) continue;
                        double minClusterDistance = Double.MAX_VALUE;
                        for (Cluster c : clusterMap.values()) {
                            double distance = this.calculateDistance(r, c.getCentroid());
                            if (!(distance < minClusterDistance)) continue;
                            minClusterDistance = distance;
                        }
                        if (minClusterDistance > maxMinDistance) {
                            maxMinDistance = minClusterDistance;
                            selectedRecordId = rId;
                        }
                        ++samplePointCounter;
                    }
                    alreadyAddedPoints.add(selectedRecordId);
                    Integer clusterId = clusterMap.size();
                    Cluster c = new Cluster((int)clusterId);
                    c.add(trainingData.get(selectedRecordId));
                    c.updateClusterParameters();
                    clusterMap.put(clusterId, c);
                    ++var9_20;
                }
                break block11;
            }
            if (initializationMethod != TrainingParameters.Initialization.PLUS_PLUS) break block11;
            StorageEngine storageEngine = this.knowledgeBase.getStorageEngine();
            HashSet<Integer> alreadyAddedPoints = new HashSet<Integer>();
            boolean bl = false;
            while (var9_22 < k) {
                Map<Object, Object> tmp_minClusterDistance = storageEngine.getBigMap("tmp_minClusterDistance", Object.class, Double.class, StorageEngine.MapType.HASHMAP, StorageEngine.StorageHint.IN_MEMORY, true, true);
                AssociativeArray minClusterDistanceArray = new AssociativeArray(tmp_minClusterDistance);
                this.streamExecutor.forEach(StreamMethods.stream(trainingData.entries(), this.isParallelized()), e -> {
                    Integer rId = (Integer)e.getKey();
                    Record r = (Record)e.getValue();
                    if (!alreadyAddedPoints.contains(rId)) {
                        double minClusterDistance = 1.0;
                        if (clusterMap.size() > 0) {
                            minClusterDistance = Double.MAX_VALUE;
                            for (Cluster c : clusterMap.values()) {
                                double distance = this.calculateDistance(r, c.getCentroid());
                                if (!(distance < minClusterDistance)) continue;
                                minClusterDistance = distance;
                            }
                        }
                        minClusterDistanceArray.put(rId, minClusterDistance);
                    }
                });
                Descriptives.normalize(minClusterDistanceArray);
                Integer selectedRecordId = (Integer)SimpleRandomSampling.weightedSampling(minClusterDistanceArray, 1, true).iterator().next();
                storageEngine.dropBigMap("tmp_minClusterDistance", tmp_minClusterDistance);
                alreadyAddedPoints.add(selectedRecordId);
                Integer clusterId = clusterMap.size();
                Cluster c = new Cluster((int)clusterId);
                c.add(trainingData.get(selectedRecordId));
                c.updateClusterParameters();
                clusterMap.put(clusterId, c);
                ++var9_22;
            }
        }
    }

    private void calculateClusters(Dataframe trainingData) {
        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
        TrainingParameters trainingParameters = (TrainingParameters)this.knowledgeBase.getTrainingParameters();
        Map clusterMap = modelParameters.getClusterMap();
        int maxIterations = trainingParameters.getMaxIterations();
        modelParameters.setTotalIterations(maxIterations);
        for (int iteration = 0; iteration < maxIterations; ++iteration) {
            int n;
            this.logger.debug("Iteration {}", (Object)iteration);
            for (Map.Entry entry : clusterMap.entrySet()) {
                Integer clusterId = entry.getKey();
                Cluster cluster = (Cluster)entry.getValue();
                cluster.reset();
                clusterMap.put(clusterId, cluster);
            }
            Map<Integer, Integer> tmp_clusterAssignments = this.knowledgeBase.getStorageEngine().getBigMap("tmp_clusterAssignments", Integer.class, Integer.class, StorageEngine.MapType.HASHMAP, StorageEngine.StorageHint.IN_MEMORY, true, true);
            this.streamExecutor.forEach(StreamMethods.stream(trainingData.entries(), this.isParallelized()), e -> {
                Integer rId = (Integer)e.getKey();
                Record r = (Record)e.getValue();
                AssociativeArray clusterDistances = new AssociativeArray();
                for (Map.Entry entry : clusterMap.entrySet()) {
                    Integer cId = (Integer)entry.getKey();
                    Cluster c = (Cluster)entry.getValue();
                    clusterDistances.put(cId, this.calculateDistance(r, c.getCentroid()));
                }
                Integer selectedClusterId = (Integer)this.getSelectedClusterFromDistances(clusterDistances);
                tmp_clusterAssignments.put(rId, selectedClusterId);
            });
            for (Map.Entry<Integer, Record> e2 : trainingData.entries()) {
                Integer rId = e2.getKey();
                Record r = e2.getValue();
                Integer selectedClusterId = tmp_clusterAssignments.get(rId);
                Cluster selectedCluster = (Cluster)clusterMap.get(selectedClusterId);
                selectedCluster.add(r);
                clusterMap.put(selectedClusterId, selectedCluster);
            }
            this.knowledgeBase.getStorageEngine().dropBigMap("tmp_clusterAssignments", tmp_clusterAssignments);
            boolean bl = false;
            for (Map.Entry e3 : clusterMap.entrySet()) {
                Integer cId = e3.getKey();
                Cluster c = (Cluster)e3.getValue();
                n |= c.updateClusterParameters();
                clusterMap.put(cId, c);
            }
            if (n != false) continue;
            modelParameters.setTotalIterations(iteration);
            break;
        }
    }

    public static class TrainingParameters
    extends AbstractTrainer.AbstractTrainingParameters {
        private static final long serialVersionUID = 1L;
        private int k = 2;
        private Initialization initializationMethod = Initialization.PLUS_PLUS;
        private Distance distanceMethod = Distance.EUCLIDIAN;
        private int maxIterations = 200;
        private double subsetFurthestFirstcValue = 2.0;
        private double categoricalGamaMultiplier = 1.0;
        private boolean weighted = false;

        public int getK() {
            return this.k;
        }

        public void setK(int k) {
            this.k = k;
        }

        public Initialization getInitializationMethod() {
            return this.initializationMethod;
        }

        public void setInitializationMethod(Initialization initializationMethod) {
            this.initializationMethod = initializationMethod;
        }

        public Distance getDistanceMethod() {
            return this.distanceMethod;
        }

        public void setDistanceMethod(Distance distanceMethod) {
            this.distanceMethod = distanceMethod;
        }

        public int getMaxIterations() {
            return this.maxIterations;
        }

        public void setMaxIterations(int maxIterations) {
            this.maxIterations = maxIterations;
        }

        public double getSubsetFurthestFirstcValue() {
            return this.subsetFurthestFirstcValue;
        }

        public void setSubsetFurthestFirstcValue(double subsetFurthestFirstcValue) {
            this.subsetFurthestFirstcValue = subsetFurthestFirstcValue;
        }

        public double getCategoricalGamaMultiplier() {
            return this.categoricalGamaMultiplier;
        }

        public void setCategoricalGamaMultiplier(double categoricalGamaMultiplier) {
            this.categoricalGamaMultiplier = categoricalGamaMultiplier;
        }

        public boolean isWeighted() {
            return this.weighted;
        }

        public void setWeighted(boolean weighted) {
            this.weighted = weighted;
        }

        public static enum Distance {
            EUCLIDIAN,
            MANHATTAN;

        }

        public static enum Initialization {
            FORGY,
            RANDOM_PARTITION,
            SET_FIRST_K,
            FURTHEST_FIRST,
            SUBSET_FURTHEST_FIRST,
            PLUS_PLUS;

        }
    }

    public static class ModelParameters
    extends AbstractClusterer.AbstractModelParameters<Cluster> {
        private static final long serialVersionUID = 1L;
        private int totalIterations;
        @BigMap(keyClass=Object.class, valueClass=Double.class, mapType=StorageEngine.MapType.HASHMAP, storageHint=StorageEngine.StorageHint.IN_MEMORY, concurrent=true)
        private Map<Object, Double> featureWeights;

        protected ModelParameters(StorageEngine storageEngine) {
            super(storageEngine);
        }

        public int getTotalIterations() {
            return this.totalIterations;
        }

        protected void setTotalIterations(int totalIterations) {
            this.totalIterations = totalIterations;
        }

        public Map<Object, Double> getFeatureWeights() {
            return this.featureWeights;
        }

        protected void setFeatureWeights(Map<Object, Double> featureWeights) {
            this.featureWeights = featureWeights;
        }
    }

    public static class Cluster
    extends AbstractClusterer.AbstractCluster {
        private static final long serialVersionUID = 1L;
        private Record centroid = new Record(new AssociativeArray(), null);
        private final AssociativeArray xi_sum = new AssociativeArray();

        protected Cluster(int clusterId) {
            super(clusterId);
        }

        public Record getCentroid() {
            return this.centroid;
        }

        protected boolean updateClusterParameters() {
            boolean changed = false;
            AssociativeArray centoidValues = this.xi_sum.copy();
            if (this.size > 0) {
                centoidValues.multiplyValues(1.0 / (double)this.size);
            }
            if (!this.centroid.getX().equals(centoidValues)) {
                changed = true;
                this.centroid = new Record(centoidValues, this.centroid.getY());
            }
            return changed;
        }

        @Override
        protected void add(Record r) {
            ++this.size;
            this.xi_sum.addValues(r.getX());
        }

        @Override
        protected void remove(Record r) {
            throw new UnsupportedOperationException("Remove operation is not supported.");
        }

        @Override
        protected void clear() {
            this.xi_sum.clear();
        }

        protected void reset() {
            this.xi_sum.clear();
            this.size = 0;
        }
    }
}

