/*
 * Decompiled with CFR 0.152.
 */
package smile.nlp;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import smile.nlp.Bigram;
import smile.nlp.Corpus;
import smile.nlp.SimpleText;
import smile.nlp.Text;
import smile.nlp.TextTerms;
import smile.nlp.dictionary.EnglishPunctuations;
import smile.nlp.dictionary.EnglishStopWords;
import smile.nlp.dictionary.Punctuations;
import smile.nlp.dictionary.StopWords;
import smile.nlp.relevance.Relevance;
import smile.nlp.relevance.RelevanceRanker;
import smile.nlp.tokenizer.SentenceSplitter;
import smile.nlp.tokenizer.SimpleSentenceSplitter;
import smile.nlp.tokenizer.SimpleTokenizer;
import smile.nlp.tokenizer.Tokenizer;

public class SimpleCorpus
implements Corpus {
    private long size;
    private List<SimpleText> docs = new ArrayList<SimpleText>();
    private HashMap<String, Integer> freq = new HashMap();
    private HashMap<Bigram, Integer> freq2 = new HashMap();
    private HashMap<String, List<SimpleText>> invertedFile = new HashMap();
    private SentenceSplitter splitter;
    private Tokenizer tokenizer;
    private StopWords stopWords;
    private Punctuations punctuations;

    public SimpleCorpus() {
        this(SimpleSentenceSplitter.getInstance(), new SimpleTokenizer(), EnglishStopWords.DEFAULT, EnglishPunctuations.getInstance());
    }

    public SimpleCorpus(SentenceSplitter splitter, Tokenizer tokenizer, StopWords stopWords, Punctuations punctuations) {
        this.splitter = splitter;
        this.tokenizer = tokenizer;
        this.stopWords = stopWords;
        this.punctuations = punctuations;
    }

    public Text add(String id, String title, String body) {
        ArrayList<String> bag = new ArrayList<String>();
        for (String sentence : this.splitter.split(body)) {
            int i;
            String[] tokens = this.tokenizer.split(sentence);
            for (i = 0; i < tokens.length; ++i) {
                tokens[i] = tokens[i].toLowerCase();
            }
            for (String w : tokens) {
                boolean keep = true;
                if (this.punctuations != null && this.punctuations.contains(w)) {
                    keep = false;
                } else if (this.stopWords != null && this.stopWords.contains(w)) {
                    keep = false;
                }
                if (!keep) continue;
                ++this.size;
                bag.add(w);
                Integer f = this.freq.get(w);
                f = f == null ? Integer.valueOf(1) : Integer.valueOf(f + 1);
                this.freq.put(w, f);
            }
            for (i = 0; i < tokens.length - 1; ++i) {
                String w1 = tokens[i];
                String w2 = tokens[i + 1];
                if (!this.freq.containsKey(w1) || !this.freq.containsKey(w2)) continue;
                Bigram bigram = new Bigram(w1, w2);
                Integer f = this.freq2.get(bigram);
                f = f == null ? Integer.valueOf(1) : Integer.valueOf(f + 1);
                this.freq2.put(bigram, f);
            }
        }
        String[] words = new String[bag.size()];
        for (int i = 0; i < words.length; ++i) {
            words[i] = (String)bag.get(i);
        }
        SimpleText doc = new SimpleText(id, title, body, words);
        this.docs.add(doc);
        for (String term : doc.unique()) {
            List<SimpleText> hit = this.invertedFile.get(term);
            if (hit == null) {
                hit = new ArrayList<SimpleText>();
                this.invertedFile.put(term, hit);
            }
            hit.add(doc);
        }
        return doc;
    }

    @Override
    public long size() {
        return this.size;
    }

    @Override
    public int getNumDocuments() {
        return this.docs.size();
    }

    @Override
    public int getNumTerms() {
        return this.freq.size();
    }

    @Override
    public long getNumBigrams() {
        return this.freq2.size();
    }

    @Override
    public int getAverageDocumentSize() {
        return (int)(this.size / (long)this.docs.size());
    }

    @Override
    public int getTermFrequency(String term) {
        Integer f = this.freq.get(term);
        if (f == null) {
            return 0;
        }
        return f;
    }

    @Override
    public int getBigramFrequency(Bigram bigram) {
        Integer f = this.freq2.get(bigram);
        if (f == null) {
            return 0;
        }
        return f;
    }

    @Override
    public Iterator<String> getTerms() {
        return this.freq.keySet().iterator();
    }

    @Override
    public Iterator<Bigram> getBigrams() {
        return this.freq2.keySet().iterator();
    }

    @Override
    public Iterator<Text> search(String term) {
        if (this.invertedFile.containsKey(term)) {
            ArrayList hits = new ArrayList(this.invertedFile.get(term));
            return hits.iterator();
        }
        return Collections.emptyIterator();
    }

    @Override
    public Iterator<Relevance> search(RelevanceRanker ranker, String term) {
        if (this.invertedFile.containsKey(term)) {
            List<SimpleText> hits = this.invertedFile.get(term);
            int n = hits.size();
            ArrayList<Relevance> rank = new ArrayList<Relevance>(n);
            for (SimpleText doc : hits) {
                int tf = doc.tf(term);
                rank.add(new Relevance(doc, ranker.rank((Corpus)this, (TextTerms)doc, term, tf, n)));
            }
            Collections.sort(rank, Collections.reverseOrder());
            return rank.iterator();
        }
        return Collections.emptyIterator();
    }

    @Override
    public Iterator<Relevance> search(RelevanceRanker ranker, String[] terms) {
        HashSet hits = new HashSet();
        for (int i = 0; i < terms.length; ++i) {
            if (!this.invertedFile.containsKey(terms[i])) continue;
            hits.addAll(this.invertedFile.get(terms[i]));
        }
        int n = hits.size();
        if (n == 0) {
            return Collections.emptyIterator();
        }
        ArrayList<Relevance> rank = new ArrayList<Relevance>(n);
        for (SimpleText doc : hits) {
            double r = 0.0;
            for (int i = 0; i < terms.length; ++i) {
                int tf = doc.tf(terms[i]);
                r += ranker.rank((Corpus)this, (TextTerms)doc, terms[i], tf, n);
            }
            rank.add(new Relevance(doc, r));
        }
        Collections.sort(rank, Collections.reverseOrder());
        return rank.iterator();
    }
}

