/*
 * Decompiled with CFR 0.152.
 */
package jsat.text.tokenizer;

import java.util.ArrayList;
import java.util.List;
import jsat.text.tokenizer.Tokenizer;

public class NaiveTokenizer
implements Tokenizer {
    private static final long serialVersionUID = -2112091783442076933L;
    private boolean useLowerCase;
    private boolean otherToWhiteSpace = true;
    private boolean noDigits = false;
    private int minTokenLength = 1;
    private int maxTokenLength = Integer.MAX_VALUE;

    public NaiveTokenizer() {
        this(true);
    }

    public NaiveTokenizer(boolean useLowerCase) {
        this.useLowerCase = useLowerCase;
    }

    public void setUseLowerCase(boolean useLowerCase) {
        this.useLowerCase = useLowerCase;
    }

    public boolean isUseLowerCase() {
        return this.useLowerCase;
    }

    public void setOtherToWhiteSpace(boolean otherToWhiteSpace) {
        this.otherToWhiteSpace = otherToWhiteSpace;
    }

    public boolean isOtherToWhiteSpace() {
        return this.otherToWhiteSpace;
    }

    @Override
    public List<String> tokenize(String input) {
        ArrayList<String> toRet = new ArrayList<String>();
        StringBuilder sb = new StringBuilder(input.length() / 10);
        this.tokenize(input, sb, toRet);
        return toRet;
    }

    @Override
    public void tokenize(String input, StringBuilder workSpace, List<String> storageSpace) {
        for (int i = 0; i < input.length(); ++i) {
            char c = input.charAt(i);
            if (Character.isLetter(c)) {
                if (this.useLowerCase) {
                    workSpace.append(Character.toLowerCase(c));
                    continue;
                }
                workSpace.append(c);
                continue;
            }
            if (!this.noDigits && Character.isDigit(c)) {
                workSpace.append(c);
                continue;
            }
            if (!this.otherToWhiteSpace && !Character.isWhitespace(c)) continue;
            if (workSpace.length() >= this.minTokenLength && workSpace.length() <= this.maxTokenLength) {
                storageSpace.add(workSpace.toString());
            }
            workSpace.setLength(0);
        }
        if (workSpace.length() >= this.minTokenLength && workSpace.length() <= this.maxTokenLength) {
            storageSpace.add(workSpace.toString());
        }
    }

    public void setMaxTokenLength(int maxTokenLength) {
        if (maxTokenLength < 1) {
            throw new IllegalArgumentException("Max token length must be positive, not " + maxTokenLength);
        }
        if (maxTokenLength <= this.minTokenLength) {
            throw new IllegalArgumentException("Max token length must be larger than the min token length");
        }
        this.maxTokenLength = maxTokenLength;
    }

    public int getMaxTokenLength() {
        return this.maxTokenLength;
    }

    public void setMinTokenLength(int minTokenLength) {
        if (minTokenLength < 0) {
            throw new IllegalArgumentException("Minimum token length must be non negative, not " + minTokenLength);
        }
        if (minTokenLength > this.maxTokenLength) {
            throw new IllegalArgumentException("Minimum token length can not exced the maximum token length");
        }
        this.minTokenLength = minTokenLength;
    }

    public int getMinTokenLength() {
        return this.minTokenLength;
    }

    public void setNoDigits(boolean noDigits) {
        this.noDigits = noDigits;
    }

    public boolean isNoDigits() {
        return this.noDigits;
    }
}

