package jsat.text.tokenizer;

import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:jsat/text/tokenizer/NGramTokenizer.class */
public class NGramTokenizer implements Tokenizer {
    private static final long serialVersionUID = 7551087420391197139L;
    private int n;
    private Tokenizer base;
    private boolean allSubN;

    public NGramTokenizer(int i, Tokenizer tokenizer, boolean z) {
        if (i <= 0) {
            throw new IllegalArgumentException("Number of n-grams must be positive, not " + i);
        }
        this.n = i;
        this.base = tokenizer;
        this.allSubN = z;
    }

    @Override // jsat.text.tokenizer.Tokenizer
    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        tokenize(str, new StringBuilder(), arrayList);
        return arrayList;
    }

    @Override // jsat.text.tokenizer.Tokenizer
    public void tokenize(String str, StringBuilder sb, List<String> list) {
        this.base.tokenize(str, sb, list);
        int size = list.size();
        if (this.n == 1) {
            return;
        }
        for (int i = 1; i < size; i++) {
            for (int i2 = this.allSubN ? 2 : this.n; i2 <= this.n; i2++) {
                sb.setLength(0);
                int i3 = i - (i2 - 1);
                if (i3 >= 0) {
                    while (i3 < i) {
                        if (sb.length() > 0) {
                            sb.append(' ');
                        }
                        sb.append(list.get(i3));
                        i3++;
                    }
                    sb.append(' ').append(list.get(i));
                    list.add(sb.toString());
                }
            }
        }
        if (this.allSubN) {
            return;
        }
        list.subList(0, size).clear();
    }
}
