/*
 * Decompiled with CFR 0.152.
 */
package recunn.datasets;

import java.io.File;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import recunn.autodiff.Graph;
import recunn.datastructs.DataSequence;
import recunn.datastructs.DataSet;
import recunn.datastructs.DataStep;
import recunn.loss.LossSoftmax;
import recunn.matrix.Matrix;
import recunn.model.LinearUnit;
import recunn.model.Model;
import recunn.model.Nonlinearity;
import recunn.util.Util;

public class TextGeneration
extends DataSet {
    public static int reportSequenceLength = 100;
    public static boolean singleWordAutocorrect = false;
    public static boolean reportPerplexity = true;
    private static Map<String, Integer> charToIndex = new HashMap<String, Integer>();
    private static Map<Integer, String> indexToChar = new HashMap<Integer, String>();
    private static int dimension;
    private static double[] vecStartEnd;
    private static final int START_END_TOKEN_INDEX = 0;
    private static Set<String> words;

    public static List<String> generateText(Model model, int steps, boolean argmax, double temperature, Random rng) throws Exception {
        ArrayList<String> lines = new ArrayList<String>();
        Matrix start = new Matrix(dimension);
        start.w[0] = 1.0;
        model.resetState();
        Graph g = new Graph(false);
        Matrix input = start.clone();
        String line = "";
        for (int s = 0; s < steps; ++s) {
            int i;
            Matrix logprobs = model.forward(input, g);
            Matrix probs = LossSoftmax.getSoftmaxProbs(logprobs, temperature);
            if (singleWordAutocorrect) {
                Matrix possible = Matrix.ones(dimension, 1);
                try {
                    possible = TextGeneration.singleWordAutocorrect(line);
                }
                catch (Exception exception) {
                    // empty catch block
                }
                double tot = 0.0;
                for (i = 0; i < probs.w.length; ++i) {
                    int n = i;
                    probs.w[n] = probs.w[n] * possible.w[i];
                    tot += probs.w[i];
                }
                i = 0;
                while (i < probs.w.length) {
                    int n = i++;
                    probs.w[n] = probs.w[n] / tot;
                }
                for (i = 0; i < probs.w.length; ++i) {
                    if (!(probs.w[i] > 0.0) || possible.w[i] != 0.0) continue;
                    throw new Exception("Illegal transition");
                }
            }
            int indxChosen = -1;
            if (argmax) {
                double high = Double.NEGATIVE_INFINITY;
                for (i = 0; i < probs.w.length; ++i) {
                    if (!(probs.w[i] > high)) continue;
                    high = probs.w[i];
                    indxChosen = i;
                }
            } else {
                indxChosen = Util.pickIndexFromRandomVector(probs, rng);
            }
            if (indxChosen == 0) {
                lines.add(line);
                line = "";
                input = start.clone();
                g = new Graph(false);
                model.resetState();
                input = start.clone();
                continue;
            }
            String ch = indexToChar.get(indxChosen);
            line = line + ch;
            for (int i2 = 0; i2 < input.w.length; ++i2) {
                input.w[i2] = 0.0;
            }
            input.w[indxChosen] = 1.0;
        }
        if (!line.equals("")) {
            lines.add(line);
        }
        return lines;
    }

    private static Matrix singleWordAutocorrect(String sequence) throws Exception {
        if ((sequence = sequence.replace("\"\n\"", " ")).equals("") || sequence.endsWith(" ")) {
            return Matrix.ones(dimension, 1);
        }
        String[] parts = sequence.split(" ");
        String lastPartialWord = parts[parts.length - 1].trim();
        if (lastPartialWord.equals(" ") || lastPartialWord.contains(" ")) {
            throw new Exception("unexpected");
        }
        ArrayList<String> matches = new ArrayList<String>();
        for (String word : words) {
            if (!word.startsWith(lastPartialWord)) continue;
            matches.add(word);
        }
        if (matches.size() == 0) {
            throw new Exception("unexpected, no matches for '" + lastPartialWord + "'");
        }
        Matrix result = new Matrix(dimension);
        boolean hit = false;
        for (String match : matches) {
            if (match.length() < lastPartialWord.length()) {
                throw new Exception("How is match shorter than partial word?");
            }
            if (lastPartialWord.equals(match)) {
                result.w[TextGeneration.charToIndex.get((Object)" ").intValue()] = 1.0;
                result.w[0] = 1.0;
                continue;
            }
            String nextChar = match.charAt(lastPartialWord.length()) + "";
            result.w[TextGeneration.charToIndex.get((Object)nextChar).intValue()] = 1.0;
            hit = true;
        }
        if (!hit) {
            result.w[TextGeneration.charToIndex.get((Object)" ").intValue()] = 1.0;
            result.w[0] = 1.0;
        }
        return result;
    }

    public static String sequenceToSentence(DataSequence sequence) {
        String result = "\"";
        for (int s = 0; s < sequence.steps.size() - 1; ++s) {
            DataStep step = sequence.steps.get(s);
            int index = -1;
            for (int i = 0; i < step.targetOutput.w.length; ++i) {
                if (step.targetOutput.w[i] != 1.0) continue;
                index = i;
                break;
            }
            String ch = indexToChar.get(index);
            result = result + ch;
        }
        result = result + "\"\n";
        return result;
    }

    public TextGeneration(String path) throws Exception {
        System.out.println("Text generation task");
        System.out.println("loading " + path + "...");
        File file = new File(path);
        List<String> lines = Files.readAllLines(file.toPath(), Charset.defaultCharset());
        HashSet<String> chars = new HashSet<String>();
        int id = 0;
        charToIndex.put("[START/END]", id);
        indexToChar.put(id, "[START/END]");
        ++id;
        System.out.println("Characters:");
        System.out.print("\t");
        for (String line : lines) {
            for (int i = 0; i < line.length(); ++i) {
                String[] parts;
                for (String part : parts = line.split(" ")) {
                    words.add(part.trim());
                }
                String ch = line.charAt(i) + "";
                if (chars.contains(ch)) continue;
                System.out.print(ch);
                chars.add(ch);
                charToIndex.put(ch, id);
                indexToChar.put(id, ch);
                ++id;
            }
        }
        dimension = chars.size() + 1;
        vecStartEnd = new double[dimension];
        TextGeneration.vecStartEnd[0] = 1.0;
        ArrayList<DataSequence> sequences = new ArrayList<DataSequence>();
        int size = 0;
        for (String line : lines) {
            ArrayList<double[]> vecs = new ArrayList<double[]>();
            vecs.add(vecStartEnd);
            for (int i = 0; i < line.length(); ++i) {
                String ch = line.charAt(i) + "";
                int index = charToIndex.get(ch);
                double[] vec = new double[dimension];
                vec[index] = 1.0;
                vecs.add(vec);
            }
            vecs.add(vecStartEnd);
            DataSequence sequence = new DataSequence();
            for (int i = 0; i < vecs.size() - 1; ++i) {
                sequence.steps.add(new DataStep((double[])vecs.get(i), (double[])vecs.get(i + 1)));
                ++size;
            }
            sequences.add(sequence);
        }
        System.out.println("Total unique chars = " + chars.size());
        System.out.println(size + " steps in training set.");
        this.training = sequences;
        this.lossTraining = new LossSoftmax();
        this.lossReporting = new LossSoftmax();
        this.inputDimension = ((DataSequence)sequences.get((int)0)).steps.get((int)0).input.w.length;
        int loc = 0;
        while (((DataSequence)sequences.get((int)0)).steps.get((int)loc).targetOutput == null) {
            ++loc;
        }
        this.outputDimension = ((DataSequence)sequences.get((int)0)).steps.get((int)loc).targetOutput.w.length;
    }

    @Override
    public void DisplayReport(Model model, Random rng) throws Exception {
        double[] temperatures;
        System.out.println("========================================");
        System.out.println("REPORT:");
        if (reportPerplexity) {
            System.out.println("\ncalculating perplexity over entire data set...");
            double perplexity = LossSoftmax.calculateMedianPerplexity(model, this.training);
            System.out.println("\nMedian Perplexity = " + String.format("%.4f", perplexity));
        }
        for (double temperature : temperatures = new double[]{1.0, 0.75, 0.5, 0.25, 0.1}) {
            if (singleWordAutocorrect) {
                System.out.println("\nTemperature " + temperature + " prediction (with single word autocorrect):");
            } else {
                System.out.println("\nTemperature " + temperature + " prediction:");
            }
            List<String> guess = TextGeneration.generateText(model, reportSequenceLength, false, temperature, rng);
            for (int i = 0; i < guess.size(); ++i) {
                if (i == guess.size() - 1) {
                    System.out.println("\t\"" + guess.get(i) + "...\"");
                    continue;
                }
                System.out.println("\t\"" + guess.get(i) + "\"");
            }
        }
        if (singleWordAutocorrect) {
            System.out.println("\nArgmax prediction (with single word autocorrect):");
        } else {
            System.out.println("\nArgmax prediction:");
        }
        List<String> guess = TextGeneration.generateText(model, reportSequenceLength, true, 1.0, rng);
        for (int i = 0; i < guess.size(); ++i) {
            if (i == guess.size() - 1) {
                System.out.println("\t\"" + guess.get(i) + "...\"");
                continue;
            }
            System.out.println("\t\"" + guess.get(i) + "\"");
        }
        System.out.println("========================================");
    }

    @Override
    public Nonlinearity getModelOutputUnitToUse() {
        return new LinearUnit();
    }

    static {
        words = new HashSet<String>();
    }
}

