package org.apache.lucene.wikipedia.analysis;

import java.io.StringReader;
import java.util.HashMap;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Token;

/* loaded from: input_file:org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.class */
public class WikipediaTokenizerTest extends TestCase {
    public WikipediaTokenizerTest(String str) {
        super(str);
    }

    protected void setUp() {
    }

    protected void tearDown() {
    }

    public void testHandwritten() throws Exception {
        HashMap hashMap = new HashMap();
        hashMap.put("link", WikipediaTokenizer.INTERNAL_LINK);
        hashMap.put("display", WikipediaTokenizer.INTERNAL_LINK);
        hashMap.put("info", WikipediaTokenizer.INTERNAL_LINK);
        hashMap.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL);
        hashMap.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL);
        hashMap.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL);
        hashMap.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL);
        hashMap.put("Test", WikipediaTokenizer.EXTERNAL_LINK);
        hashMap.put("This", "<ALPHANUM>");
        hashMap.put("is", "<ALPHANUM>");
        hashMap.put("a", "<ALPHANUM>");
        hashMap.put("Category", "<ALPHANUM>");
        hashMap.put("linked", "<ALPHANUM>");
        hashMap.put("parens", "<ALPHANUM>");
        hashMap.put("external", "<ALPHANUM>");
        hashMap.put("URL", "<ALPHANUM>");
        hashMap.put("and", "<ALPHANUM>");
        hashMap.put("period", "<ALPHANUM>");
        hashMap.put("Here", "<ALPHANUM>");
        hashMap.put("Here's", "<APOSTROPHE>");
        hashMap.put("here", "<ALPHANUM>");
        hashMap.put("Johnny", "<ALPHANUM>");
        hashMap.put("followed", "<ALPHANUM>");
        hashMap.put("by", "<ALPHANUM>");
        hashMap.put("text", "<ALPHANUM>");
        hashMap.put("that", "<ALPHANUM>");
        hashMap.put("but", "<ALPHANUM>");
        hashMap.put("never", "<ALPHANUM>");
        hashMap.put("closed", "<ALPHANUM>");
        hashMap.put("goes", "<ALPHANUM>");
        hashMap.put("for", "<ALPHANUM>");
        hashMap.put("this", "<ALPHANUM>");
        hashMap.put("an", "<ALPHANUM>");
        hashMap.put("some", "<ALPHANUM>");
        hashMap.put("martian", "<ALPHANUM>");
        hashMap.put("code", "<ALPHANUM>");
        hashMap.put("foo", WikipediaTokenizer.CATEGORY);
        hashMap.put("bar", WikipediaTokenizer.CATEGORY);
        hashMap.put("none", WikipediaTokenizer.CATEGORY);
        hashMap.put("withstanding", WikipediaTokenizer.CATEGORY);
        hashMap.put("blah", WikipediaTokenizer.CATEGORY);
        hashMap.put("ital", WikipediaTokenizer.CATEGORY);
        hashMap.put("cat", WikipediaTokenizer.CATEGORY);
        hashMap.put("italics", WikipediaTokenizer.ITALICS);
        hashMap.put("more", WikipediaTokenizer.ITALICS);
        hashMap.put("bold", WikipediaTokenizer.BOLD);
        hashMap.put("same", WikipediaTokenizer.BOLD);
        hashMap.put("five", WikipediaTokenizer.BOLD_ITALICS);
        hashMap.put("and2", WikipediaTokenizer.BOLD_ITALICS);
        hashMap.put("quotes", WikipediaTokenizer.BOLD_ITALICS);
        hashMap.put("heading", WikipediaTokenizer.HEADING);
        hashMap.put("sub", WikipediaTokenizer.SUB_HEADING);
        hashMap.put("head", WikipediaTokenizer.SUB_HEADING);
        hashMap.put("Citation", WikipediaTokenizer.CITATION);
        hashMap.put("3.25", "<NUM>");
        hashMap.put("3.50", "<NUM>");
        WikipediaTokenizer wikipediaTokenizer = new WikipediaTokenizer(new StringReader("[[link]] This is a [[Category:foo]] Category  This is a linked [[:Category:bar none withstanding]] Category This is (parens) This is a [[link]]  This is an external URL [http://lucene.apache.org] Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes'''''  This is a [[link|display info]]  This is a period.  Here is $3.25 and here is 3.50.  Here's Johnny.  ==heading== ===sub head=== followed by some text  [[Category:blah| ]] ''[[Category:ital_cat]]''  here is some that is ''italics [[Category:foo]] but is never closed.'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test] [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] <ref>Citation</ref> <sup>martian</sup> <span class=\"glue\">code</span>"));
        Token token = new Token();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        while (true) {
            Token next = wikipediaTokenizer.next(token);
            token = next;
            if (next == null) {
                break;
            }
            String termText = token.termText();
            assertTrue("token is null and it shouldn't be", token != null);
            String str = (String) hashMap.get(termText);
            assertTrue("expectedType is null and it shouldn't be for: " + token, str != null);
            assertTrue(token.type() + " is not equal to " + str + " for " + token, token.type().equals(str));
            i++;
            if (token.type().equals(WikipediaTokenizer.ITALICS)) {
                i2++;
            } else if (token.type().equals(WikipediaTokenizer.BOLD_ITALICS)) {
                i3++;
            } else if (token.type().equals(WikipediaTokenizer.CATEGORY)) {
                i4++;
            } else if (token.type().equals(WikipediaTokenizer.CITATION)) {
                i5++;
            }
        }
        assertTrue("We have not seen enough tokens: " + i + " is not >= " + hashMap.size(), i >= hashMap.size());
        assertTrue(i2 + " does not equal: 4 for numItalics", i2 == 4);
        assertTrue(i3 + " does not equal: 3 for numBoldItalics", i3 == 3);
        assertTrue(i4 + " does not equal: 10 for numCategory", i4 == 10);
        assertTrue(i5 + " does not equal: 1 for numCitation", i5 == 1);
    }

    public void testLinkPhrases() throws Exception {
        WikipediaTokenizer wikipediaTokenizer = new WikipediaTokenizer(new StringReader("click [[link here again]] click [http://lucene.apache.org here again]"));
        Token next = wikipediaTokenizer.next(new Token());
        assertTrue("token is null and it shouldn't be", next != null);
        assertTrue(new String(next.termBuffer(), 0, next.termLength()) + " is not equal to click", new String(next.termBuffer(), 0, next.termLength()).equals("click"));
        assertTrue(next.getPositionIncrement() + " does not equal: 1", next.getPositionIncrement() == 1);
        Token next2 = wikipediaTokenizer.next(next);
        assertTrue("token is null and it shouldn't be", next2 != null);
        assertTrue(new String(next2.termBuffer(), 0, next2.termLength()) + " is not equal to link", new String(next2.termBuffer(), 0, next2.termLength()).equals("link"));
        assertTrue(next2.getPositionIncrement() + " does not equal: 1", next2.getPositionIncrement() == 1);
        Token next3 = wikipediaTokenizer.next(next2);
        assertTrue("token is null and it shouldn't be", next3 != null);
        assertTrue(new String(next3.termBuffer(), 0, next3.termLength()) + " is not equal to here", new String(next3.termBuffer(), 0, next3.termLength()).equals("here"));
        assertTrue(next3.getPositionIncrement() + " does not equal: 1", next3.getPositionIncrement() == 1);
        Token next4 = wikipediaTokenizer.next(next3);
        assertTrue("token is null and it shouldn't be", next4 != null);
        assertTrue(new String(next4.termBuffer(), 0, next4.termLength()) + " is not equal to again", new String(next4.termBuffer(), 0, next4.termLength()).equals("again"));
        assertTrue(next4.getPositionIncrement() + " does not equal: 1", next4.getPositionIncrement() == 1);
        Token next5 = wikipediaTokenizer.next(next4);
        assertTrue("token is null and it shouldn't be", next5 != null);
        assertTrue(new String(next5.termBuffer(), 0, next5.termLength()) + " is not equal to click", new String(next5.termBuffer(), 0, next5.termLength()).equals("click"));
        assertTrue(next5.getPositionIncrement() + " does not equal: 1", next5.getPositionIncrement() == 1);
        Token next6 = wikipediaTokenizer.next(next5);
        assertTrue("token is null and it shouldn't be", next6 != null);
        assertTrue(new String(next6.termBuffer(), 0, next6.termLength()) + " is not equal to http://lucene.apache.org", new String(next6.termBuffer(), 0, next6.termLength()).equals("http://lucene.apache.org"));
        assertTrue(next6.getPositionIncrement() + " does not equal: 1", next6.getPositionIncrement() == 1);
        Token next7 = wikipediaTokenizer.next(next6);
        assertTrue("token is null and it shouldn't be", next7 != null);
        assertTrue(new String(next7.termBuffer(), 0, next7.termLength()) + " is not equal to here", new String(next7.termBuffer(), 0, next7.termLength()).equals("here"));
        assertTrue(next7.getPositionIncrement() + " does not equal: 0", next7.getPositionIncrement() == 0);
        Token next8 = wikipediaTokenizer.next(next7);
        assertTrue("token is null and it shouldn't be", next8 != null);
        assertTrue(new String(next8.termBuffer(), 0, next8.termLength()) + " is not equal to again", new String(next8.termBuffer(), 0, next8.termLength()).equals("again"));
        assertTrue(next8.getPositionIncrement() + " does not equal: 1", next8.getPositionIncrement() == 1);
    }

    public void testLinks() throws Exception {
        WikipediaTokenizer wikipediaTokenizer = new WikipediaTokenizer(new StringReader("[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]"));
        Token next = wikipediaTokenizer.next(new Token());
        assertTrue("token is null and it shouldn't be", next != null);
        assertTrue(new String(next.termBuffer(), 0, next.termLength()) + " is not equal to http://lucene.apache.org/java/docs/index.html#news", new String(next.termBuffer(), 0, next.termLength()).equals("http://lucene.apache.org/java/docs/index.html#news"));
        assertTrue(next.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, next.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL));
        wikipediaTokenizer.next(next);
        Token next2 = wikipediaTokenizer.next(next);
        assertTrue("token is null and it shouldn't be", next2 != null);
        assertTrue(new String(next2.termBuffer(), 0, next2.termLength()) + " is not equal to http://lucene.apache.org/java/docs/index.html?b=c", new String(next2.termBuffer(), 0, next2.termLength()).equals("http://lucene.apache.org/java/docs/index.html?b=c"));
        assertTrue(next2.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, next2.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL));
        wikipediaTokenizer.next(next2);
        Token next3 = wikipediaTokenizer.next(next2);
        assertTrue("token is null and it shouldn't be", next3 != null);
        assertTrue(new String(next3.termBuffer(), 0, next3.termLength()) + " is not equal to https://lucene.apache.org/java/docs/index.html?b=c", new String(next3.termBuffer(), 0, next3.termLength()).equals("https://lucene.apache.org/java/docs/index.html?b=c"));
        assertTrue(next3.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, next3.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL));
    }
}
