/*
 * Decompiled with CFR 0.152.
 */
package jsat.text.tokenizer;

import java.util.ArrayList;
import java.util.List;
import jsat.text.tokenizer.Tokenizer;

public class NGramTokenizer
implements Tokenizer {
    private static final long serialVersionUID = 7551087420391197139L;
    private int n;
    private Tokenizer base;
    private boolean allSubN;

    public NGramTokenizer(int n, Tokenizer base, boolean allSubN) {
        if (n <= 0) {
            throw new IllegalArgumentException("Number of n-grams must be positive, not " + n);
        }
        this.n = n;
        this.base = base;
        this.allSubN = allSubN;
    }

    @Override
    public List<String> tokenize(String input) {
        ArrayList<String> storageSpace = new ArrayList<String>();
        this.tokenize(input, new StringBuilder(), storageSpace);
        return storageSpace;
    }

    @Override
    public void tokenize(String input, StringBuilder workSpace, List<String> storageSpace) {
        this.base.tokenize(input, workSpace, storageSpace);
        int origSize = storageSpace.size();
        if (this.n == 1) {
            return;
        }
        for (int i = 1; i < origSize; ++i) {
            int gramSize;
            int n = gramSize = this.allSubN ? 2 : this.n;
            while (gramSize <= this.n) {
                int j;
                workSpace.setLength(0);
                if (j >= 0) {
                    for (j = i - (gramSize - 1); j < i; ++j) {
                        if (workSpace.length() > 0) {
                            workSpace.append(' ');
                        }
                        workSpace.append(storageSpace.get(j));
                    }
                    workSpace.append(' ').append(storageSpace.get(i));
                    storageSpace.add(workSpace.toString());
                }
                ++gramSize;
            }
        }
        if (!this.allSubN) {
            storageSpace.subList(0, origSize).clear();
        }
    }
}

