/*
 * Decompiled with CFR 0.152.
 */
package com.datumbox.framework.core.common.text.extractors;

import com.datumbox.framework.core.common.text.extractors.AbstractTextExtractor;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

public class NgramsExtractor
extends AbstractTextExtractor<Parameters, String, Double> {
    public NgramsExtractor(Parameters parameters) {
        super(parameters);
    }

    @Override
    public Map<String, Double> extract(String text) {
        HashMap<Integer, String> ID2word = new HashMap<Integer, String>();
        HashMap<Integer, Double> ID2occurrences = new HashMap<Integer, Double>();
        LinkedHashMap<Integer, Integer> position2ID = new LinkedHashMap<Integer, Integer>();
        int numberOfWordsInDoc = this.buildInternalArrays(text, ID2word, ID2occurrences, position2ID);
        int maxCombinations = ((Parameters)this.parameters).getMaxCombinations();
        HashMap<String, Double> keywordsMap = new HashMap<String, Double>();
        for (Map.Entry entry : position2ID.entrySet()) {
            Integer wordID = (Integer)entry.getValue();
            if (!this.useThisWord(wordID, ID2word, ID2occurrences)) continue;
            Integer position = (Integer)entry.getKey();
            Map<LinkedList<Integer>, Double> positionCombinationsWithScores = this.getPositionCombinationsWithinWindow(position, maxCombinations, ID2word, ID2occurrences, position2ID, numberOfWordsInDoc);
            for (Map.Entry<LinkedList<Integer>, Double> entry2 : positionCombinationsWithScores.entrySet()) {
                LinkedList<Integer> positionCombination = entry2.getKey();
                StringBuilder sb = new StringBuilder(positionCombination.size() * 6);
                for (Integer pos : positionCombination) {
                    sb.append((String)ID2word.get(position2ID.get(pos))).append(" ");
                }
                if (sb.length() <= 0) continue;
                String key = sb.toString().trim();
                double score = entry2.getValue();
                keywordsMap.put(key, keywordsMap.getOrDefault(key, 0.0) + score);
            }
        }
        double minScore = ((Parameters)this.parameters).getMinWordOccurrence();
        Iterator it = keywordsMap.entrySet().iterator();
        while (it.hasNext()) {
            Map.Entry entry = it.next();
            if (!((Double)entry.getValue() < minScore)) continue;
            it.remove();
        }
        return keywordsMap;
    }

    private Map<LinkedList<Integer>, Double> getPositionCombinationsWithinWindow(Integer windowStart, int maxCombinations, Map<Integer, String> ID2word, Map<Integer, Double> ID2occurrences, Map<Integer, Integer> position2ID, int numberOfWordsInDoc) {
        int maxDistanceBetweenKwds = ((Parameters)this.parameters).getMaxDistanceBetweenKwds();
        int windowLength = maxDistanceBetweenKwds == 0 ? maxCombinations : Math.max(((Parameters)this.parameters).getExaminationWindowLength(), maxCombinations);
        int windowEnd = Math.min(windowStart + windowLength, numberOfWordsInDoc);
        HashMap<LinkedList<Integer>, Double> positionCombinationsWithScores = new HashMap<LinkedList<Integer>, Double>();
        LinkedList<Integer> seedList = new LinkedList<Integer>();
        seedList.add(windowStart);
        positionCombinationsWithScores.put(seedList, 1.0);
        for (int i = windowStart + 1; i < windowEnd; ++i) {
            Integer ID = position2ID.get(i);
            if (ID == null || !this.useThisWord(ID, ID2word, ID2occurrences)) continue;
            HashMap newPositionCombinations = new HashMap();
            int wordsBetweenStart = i - (windowStart + 1);
            for (LinkedList previousPositionCombination : positionCombinationsWithScores.keySet()) {
                int wordsBetweenLastCombo;
                int previousNumWords = previousPositionCombination.size();
                if (previousNumWords >= maxCombinations || (wordsBetweenLastCombo = i - ((Integer)previousPositionCombination.getLast() + 1)) > maxDistanceBetweenKwds) continue;
                int currentNumWords = previousNumWords + 1;
                int extraWords = wordsBetweenStart - (currentNumWords - 2);
                double score = 1.0 / (1.0 + (double)extraWords);
                LinkedList<Integer> currentPositionCombination = new LinkedList<Integer>(previousPositionCombination);
                currentPositionCombination.add(i);
                newPositionCombinations.put(currentPositionCombination, score);
            }
            positionCombinationsWithScores.putAll(newPositionCombinations);
        }
        return positionCombinationsWithScores;
    }

    private boolean useThisWord(Integer wordID, Map<Integer, String> ID2word, Map<Integer, Double> ID2occurrences) {
        String word = ID2word.get(wordID);
        if (word == null) {
            return false;
        }
        if (word.length() < ((Parameters)this.parameters).getMinWordLength()) {
            return false;
        }
        return !(ID2occurrences.get(wordID) < (double)((Parameters)this.parameters).getMinWordOccurrence());
    }

    private int buildInternalArrays(String text, Map<Integer, String> ID2word, Map<Integer, Double> ID2occurrences, Map<Integer, Integer> position2ID) {
        HashMap<String, Integer> word2ID = new HashMap<String, Integer>();
        List<String> keywordList = this.generateTokenizer().tokenize(text);
        int lastId = -1;
        int numberOfWordsInDoc = keywordList.size();
        for (int position = 0; position < numberOfWordsInDoc; ++position) {
            String word = keywordList.get(position);
            Integer id = (Integer)word2ID.get(word);
            if (id == null) {
                id = ++lastId;
                word2ID.put(word, id);
                ID2word.put(id, word);
                ID2occurrences.put(id, 0.0);
            }
            ID2occurrences.put(id, ID2occurrences.get(id) + 1.0);
            position2ID.put(position, id);
        }
        return numberOfWordsInDoc;
    }

    public static class Parameters
    extends AbstractTextExtractor.AbstractParameters {
        private static final long serialVersionUID = 1L;
        private int maxCombinations = 3;
        private int minWordLength = 1;
        private int minWordOccurrence = 1;
        private int examinationWindowLength = 3;
        private int maxDistanceBetweenKwds = 0;

        public int getMaxCombinations() {
            return this.maxCombinations;
        }

        public void setMaxCombinations(int maxCombinations) {
            this.maxCombinations = maxCombinations;
        }

        public int getMinWordLength() {
            return this.minWordLength;
        }

        public void setMinWordLength(int minWordLength) {
            this.minWordLength = minWordLength;
        }

        public int getMinWordOccurrence() {
            return this.minWordOccurrence;
        }

        public void setMinWordOccurrence(int minWordOccurrence) {
            this.minWordOccurrence = minWordOccurrence;
        }

        public int getExaminationWindowLength() {
            return this.examinationWindowLength;
        }

        public void setExaminationWindowLength(int examinationWindowLength) {
            this.examinationWindowLength = examinationWindowLength;
        }

        public int getMaxDistanceBetweenKwds() {
            return this.maxDistanceBetweenKwds;
        }

        public void setMaxDistanceBetweenKwds(int maxDistanceBetweenKwds) {
            this.maxDistanceBetweenKwds = maxDistanceBetweenKwds;
        }
    }
}

