/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp.collocation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import smile.nlp.NGram;
import smile.nlp.dictionary.EnglishPunctuations;
import smile.nlp.dictionary.EnglishStopWords;

public class AprioriPhraseExtractor {
    public ArrayList<ArrayList<NGram>> extract(Collection<String[]> sentences, int maxNGramSize, int minFrequency) {
        ArrayList features = new ArrayList(maxNGramSize + 1);
        features.add(new HashSet());
        for (int n = 1; n <= maxNGramSize; ++n) {
            HashMap<NGram, Integer> candidates = new HashMap<NGram, Integer>();
            HashSet<NGram> hashSet = new HashSet<NGram>();
            features.add(hashSet);
            Set feature_1 = (Set)features.get(n - 1);
            for (String[] stringArray : sentences) {
                for (int i = 0; i <= stringArray.length - n; ++i) {
                    NGram ngram = new NGram(Arrays.copyOfRange(stringArray, i, i + n));
                    boolean add = false;
                    if (n == 1) {
                        add = true;
                    } else {
                        NGram initialGram = new NGram(Arrays.copyOfRange(stringArray, i, i + n - 1));
                        NGram finalGram = new NGram(Arrays.copyOfRange(stringArray, i + 1, i + n));
                        if (feature_1.contains(initialGram) && feature_1.contains(finalGram)) {
                            add = true;
                        }
                    }
                    if (!add) continue;
                    if (candidates.containsKey(ngram)) {
                        candidates.put(ngram, (Integer)candidates.get(ngram) + 1);
                        continue;
                    }
                    candidates.put(ngram, 1);
                }
            }
            for (Map.Entry entry : candidates.entrySet()) {
                if ((Integer)entry.getValue() < minFrequency) continue;
                NGram ngram = (NGram)entry.getKey();
                if (ngram.words.length == 1 && EnglishPunctuations.getInstance().contains(ngram.words[0])) continue;
                ngram.freq = (Integer)entry.getValue();
                hashSet.add(ngram);
            }
        }
        ArrayList<ArrayList<NGram>> results = new ArrayList<ArrayList<NGram>>();
        for (Set set : features) {
            ArrayList<NGram> result = new ArrayList<NGram>();
            results.add(result);
            for (NGram nGram : set) {
                boolean stopWord = true;
                if (!EnglishStopWords.DEFAULT.contains(nGram.words[0]) && !EnglishStopWords.DEFAULT.contains(nGram.words[nGram.words.length - 1])) {
                    for (String word : nGram.words) {
                        if (EnglishStopWords.DEFAULT.contains(word)) continue;
                        stopWord = false;
                        break;
                    }
                }
                if (stopWord) continue;
                result.add(nGram);
            }
            Collections.sort(result);
            Collections.reverse(result);
        }
        return results;
    }
}

