/*
 * Decompiled with CFR 0.152.
 */
package com.datumbox.framework.core.machinelearning.featureselection;

import com.datumbox.framework.common.Configuration;
import com.datumbox.framework.common.concurrency.StreamMethods;
import com.datumbox.framework.common.dataobjects.TypeInference;
import com.datumbox.framework.common.storage.interfaces.StorageEngine;
import com.datumbox.framework.core.common.dataobjects.Dataframe;
import com.datumbox.framework.core.common.dataobjects.Record;
import com.datumbox.framework.core.machinelearning.common.abstracts.featureselectors.AbstractScoreBasedFeatureSelector;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.BiFunction;

public class TFIDF
extends AbstractScoreBasedFeatureSelector<ModelParameters, TrainingParameters> {
    protected TFIDF(TrainingParameters trainingParameters, Configuration configuration) {
        super(trainingParameters, configuration);
    }

    protected TFIDF(String storageName, Configuration configuration) {
        super(storageName, configuration);
    }

    @Override
    public void fit(Dataframe trainingData) {
        Set<TypeInference.DataType> supportedXDataTypes = this.getSupportedXDataTypes();
        for (TypeInference.DataType d : trainingData.getXDataTypes().values()) {
            if (supportedXDataTypes.contains((Object)d)) continue;
            throw new IllegalArgumentException("A DataType that is not supported by this method was detected in the Dataframe.");
        }
        super.fit(trainingData);
    }

    @Override
    protected void _fit(Dataframe trainingData) {
        ModelParameters modelParameters = (ModelParameters)this.knowledgeBase.getModelParameters();
        TrainingParameters trainingParameters = (TrainingParameters)this.knowledgeBase.getTrainingParameters();
        boolean binarized = trainingParameters.isBinarized();
        int n = trainingData.size();
        StorageEngine storageEngine = this.knowledgeBase.getStorageEngine();
        Map<Object, Double> tmp_idfMap = storageEngine.getBigMap("tmp_idf", Object.class, Double.class, StorageEngine.MapType.HASHMAP, StorageEngine.StorageHint.IN_MEMORY, true, true);
        for (Record r2 : trainingData) {
            for (Map.Entry<Object, Object> entry2 : r2.getX().entrySet()) {
                Object keyword = entry2.getKey();
                Double counts = TypeInference.toDouble(entry2.getValue());
                if (!(counts > 0.0)) continue;
                tmp_idfMap.put(keyword, tmp_idfMap.getOrDefault(keyword, 0.0) + 1.0);
            }
        }
        Integer rareFeatureThreshold = trainingParameters.getRareFeatureThreshold();
        if (rareFeatureThreshold != null && rareFeatureThreshold > 0) {
            this.removeRareFeatures(tmp_idfMap, rareFeatureThreshold);
        }
        this.streamExecutor.forEach(StreamMethods.stream(tmp_idfMap.entrySet().stream(), this.isParallelized()), entry -> {
            Object keyword = entry.getKey();
            Double countsInDocument = (Double)entry.getValue();
            tmp_idfMap.put(keyword, Math.log10((double)n / countsInDocument));
        });
        Map<Object, Double> featureScores = modelParameters.getFeatureScores();
        BiFunction<Object, Double, Boolean> isGreaterThanMax = (feature, newScore) -> {
            Double maxScore = (Double)featureScores.get(feature);
            return maxScore == null || maxScore < newScore;
        };
        this.streamExecutor.forEach(StreamMethods.stream(trainingData.stream(), this.isParallelized()), r -> {
            for (Map.Entry<Object, Object> entry : r.getX().entrySet()) {
                Double counts = TypeInference.toDouble(entry.getValue());
                if (!(counts > 0.0)) continue;
                Object keyword = entry.getKey();
                double d = binarized ? 1.0 : counts;
                double tf = d;
                double idf = tmp_idfMap.getOrDefault(keyword, 0.0);
                double tfidf = tf * idf;
                if (!(tfidf > 0.0) || !((Boolean)isGreaterThanMax.apply(keyword, tfidf)).booleanValue()) continue;
                Map map = featureScores;
                synchronized (map) {
                    if (((Boolean)isGreaterThanMax.apply(keyword, tfidf)).booleanValue()) {
                        featureScores.put(keyword, tfidf);
                    }
                }
            }
        });
        storageEngine.dropBigMap("tmp_idf", tmp_idfMap);
        Integer maxFeatures = trainingParameters.getMaxFeatures();
        if (maxFeatures != null && maxFeatures < featureScores.size()) {
            this.keepTopFeatures(featureScores, maxFeatures);
        }
    }

    @Override
    protected Set<TypeInference.DataType> getSupportedXDataTypes() {
        return new HashSet<TypeInference.DataType>(Arrays.asList(TypeInference.DataType.BOOLEAN, TypeInference.DataType.NUMERICAL));
    }

    @Override
    protected Set<TypeInference.DataType> getSupportedYDataTypes() {
        return null;
    }

    public static class TrainingParameters
    extends AbstractScoreBasedFeatureSelector.AbstractTrainingParameters {
        private static final long serialVersionUID = 1L;
        private boolean binarized = false;

        public boolean isBinarized() {
            return this.binarized;
        }

        public void setBinarized(boolean binarized) {
            this.binarized = binarized;
        }
    }

    public static class ModelParameters
    extends AbstractScoreBasedFeatureSelector.AbstractModelParameters {
        private static final long serialVersionUID = 2L;

        protected ModelParameters(StorageEngine storageEngine) {
            super(storageEngine);
        }
    }
}

