/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.classify;

import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.BinaryClassifier;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.Classifier;
import edu.cmu.minorthird.classify.ClassifierLearner;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.DatasetClassifierTeacher;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.MutableInstance;
import edu.cmu.minorthird.classify.semisupervised.SemiSupervisedDataset;
import edu.cmu.minorthird.classify.sequential.SequenceDataset;
import edu.cmu.minorthird.util.MathUtil;
import edu.cmu.minorthird.util.gui.ViewerFrame;
import edu.cmu.minorthird.util.gui.Visible;
import java.util.Iterator;
import java.util.Random;
import java.util.StringTokenizer;
import org.apache.log4j.Logger;

public class SampleDatasets {
    private static Logger log = Logger.getLogger(SampleDatasets.class);
    public static final String[] posTrain = new String[]{"a pricy doll house", "a little red fire truck", "a red wagon", "a pricy red sports car", "punk queen barbie and ken", "a little red bike"};
    public static final String[] negTrain = new String[]{"a a a a big 7-seater minivan with an automatic transmission", "a big house in the suburbs with a crushing mortgage", "a job for life at IBM", "a huge pile of tax forms, due yesterday", "huge pile of junk mail, bills, and catalogs"};
    public static final String[] posTest = new String[]{"a pricy barbie doll", "a little yellow toy car", "a red 10 speed bike", "a red convertible porshe"};
    public static final String[] negTest = new String[]{"a big pile of paperwork", "a huge backlog of email", "a life of woe and trouble"};
    private static String[] posBayesTrain = new String[]{"a a pricy doll house", "a a little red red fire truck", "a red wagon", "a pricy red sports car", "punk queen barbie and and ken", "a little red bike"};
    private static String[] negBayesTrain = new String[]{"a big big 7-seater minivan with with an an automatic transmission", "a big house in the suburbs with a a crushing mortgage", "a job for for life at at IBM", "a huge pile of of tax forms, due yesterday", "huge pile of of junk mail, bills, and catalogs"};
    private static String[] posBayesTest = new String[]{"a a pricy barbie doll", "a little yellow toy car", "a a red 10 speed bike", "a red convertible porshe"};
    private static String[] negBayesTest = new String[]{"a big pile of of paperwork", "a huge backlog of email", "a life of woe and and trouble"};
    private static String[] posBayesExtremeTrain = new String[]{"p1 p1 p1 p2 p2 p3 p3 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p2 p3 p3 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p3 p3 p3 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p3 p3 p4 p4 p4 p5 p5 n1 n2 n3 n4 n5", "p1 p1 p2 p2 p3 p3 p4 p4 p5 p5 p5 n1 n2 n3 n4 n5"};
    private static String[] negBayesExtremeTrain = new String[]{"p1 p2 p3 p4 p5 n1 n1 n1 n2 n2 n3 n3 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n2 n3 n3 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n3 n3 n3 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n3 n3 n4 n4 n4 n5 n5", "p1 p2 p3 p4 p5 n1 n1 n2 n2 n3 n3 n4 n4 n5 n5 n5"};
    private static String[] posBayesExtremeTest = new String[]{"p1 p1 n1", "p2 p2 n2", "p3 p3 n3", "p4 p4 n4", "p5 p5 n5"};
    private static String[] negBayesExtremeTest = new String[]{"p1 n1 n1", "p2 n2 n2", "p3 n3 n3", "p4 n4 n4", "p5 n5 n5"};
    private static String[] unlabeledBayesExtreme = new String[]{"p1 n1 n1", "p2 n2 n2", "p3 n3 n3", "p1 p1 n1", "p2 p2 n2", "p3 p3 n3"};

    private static Dataset makeData(String[] pos, String[] neg) {
        int i;
        BasicDataset result = new BasicDataset();
        for (i = 0; i < pos.length; ++i) {
            result.add(SampleDatasets.makeExample(1.0, pos[i]));
        }
        for (i = 0; i < neg.length; ++i) {
            result.add(SampleDatasets.makeExample(-1.0, neg[i]));
        }
        return result;
    }

    private static Example makeExample(double label, String text) {
        MutableInstance instance = new MutableInstance(text);
        StringTokenizer tok = new StringTokenizer(text);
        while (tok.hasMoreTokens()) {
            String word = tok.nextToken();
            instance.addBinary(new Feature(word));
        }
        return new Example(instance, ClassLabel.binaryLabel(label));
    }

    public static Dataset toyTrain() {
        return SampleDatasets.makeData(posTrain, negTrain);
    }

    public static Dataset toyTest() {
        return SampleDatasets.makeData(posTest, negTest);
    }

    public static Dataset toyBayesExtremeTrain() {
        return SampleDatasets.makeBayesData(posBayesExtremeTrain, negBayesExtremeTrain);
    }

    public static Dataset toyBayesExtremeTest() {
        return SampleDatasets.makeBayesData(posBayesExtremeTest, negBayesExtremeTest);
    }

    public static Dataset toyBayesExtremeUnlabeledTrain() {
        return SampleDatasets.makeUnlabeledBayesData(posBayesExtremeTrain, negBayesExtremeTrain, unlabeledBayesExtreme);
    }

    private static Dataset makeUnlabeledBayesData(String[] pos, String[] neg, String[] unlabeled) {
        int i;
        SemiSupervisedDataset result = new SemiSupervisedDataset();
        for (i = 0; i < pos.length; ++i) {
            result.add(SampleDatasets.makeLabeledBayesExample(new ClassLabel("POS"), pos[i]));
        }
        for (i = 0; i < neg.length; ++i) {
            result.add(SampleDatasets.makeLabeledBayesExample(new ClassLabel("NEG"), neg[i]));
        }
        for (i = 0; i < unlabeled.length; ++i) {
            result.addUnlabeled(SampleDatasets.makeUnlabeledBayesExample(unlabeled[i]));
        }
        return result;
    }

    private static Example makeLabeledBayesExample(ClassLabel label, String text) {
        MutableInstance instance = new MutableInstance();
        StringTokenizer tok = new StringTokenizer(text);
        while (tok.hasMoreTokens()) {
            String word = tok.nextToken();
            Feature f = new Feature(word);
            double w = instance.getWeight(f);
            if (w == 0.0) {
                instance.addBinary(f);
                continue;
            }
            instance.addNumeric(f, w + 1.0);
        }
        return new Example(instance, label);
    }

    private static Instance makeUnlabeledBayesExample(String text) {
        MutableInstance instance = new MutableInstance();
        StringTokenizer tok = new StringTokenizer(text);
        while (tok.hasMoreTokens()) {
            String word = tok.nextToken();
            Feature f = new Feature(word);
            double w = instance.getWeight(f);
            if (w == 0.0) {
                instance.addBinary(f);
                continue;
            }
            instance.addNumeric(f, w + 1.0);
        }
        return instance;
    }

    private static Dataset makeBayesData(String[] pos, String[] neg) {
        int i;
        BasicDataset result = new BasicDataset();
        for (i = 0; i < pos.length; ++i) {
            result.add(SampleDatasets.makeBayesExample(1.0, pos[i]));
        }
        for (i = 0; i < neg.length; ++i) {
            result.add(SampleDatasets.makeBayesExample(-1.0, neg[i]));
        }
        return result;
    }

    private static Example makeBayesExample(double label, String text) {
        MutableInstance instance = new MutableInstance();
        StringTokenizer tok = new StringTokenizer(text);
        while (tok.hasMoreTokens()) {
            String word = tok.nextToken();
            Feature f = new Feature(word);
            double w = instance.getWeight(f);
            if (w == 0.0) {
                instance.addBinary(f);
                continue;
            }
            instance.addNumeric(f, w + 1.0);
        }
        return new Example(instance, ClassLabel.binaryLabel(label));
    }

    public static Dataset toyBayesTrain() {
        return SampleDatasets.makeBayesData(posBayesTrain, negBayesTrain);
    }

    public static Dataset toyBayesTest() {
        return SampleDatasets.makeBayesData(posBayesTest, negBayesTest);
    }

    public static Dataset makeSparseNumericData(Random r, int m) {
        BasicDataset result = new BasicDataset();
        Feature fx = new Feature("x");
        for (int i = 0; i < m; ++i) {
            MutableInstance instance = new MutableInstance();
            double x = r.nextDouble();
            if (x > 0.7) {
                instance.addNumeric(fx, 1.0);
                result.add(new Example(instance, ClassLabel.binaryLabel(1.0)));
                continue;
            }
            result.add(new Example(instance, ClassLabel.binaryLabel(-1.0)));
        }
        return result;
    }

    public static Dataset makeNumericData(Random r, int dim, int m) {
        Feature fx = new Feature("x");
        Feature fy = new Feature("y");
        BasicDataset result = new BasicDataset();
        String[] vars = new String[]{"x", "y", "z", "t", "u", "v", "w"};
        if (dim > vars.length) {
            throw new IllegalArgumentException("dim to big!");
        }
        for (int i = 0; i < m; ++i) {
            MutableInstance instance = new MutableInstance();
            for (int j = 0; j < dim; ++j) {
                if (j == i) continue;
                instance.addNumeric(new Feature(vars[j]), r.nextDouble() * 10.0);
            }
            double x = instance.getWeight(fx);
            double y = instance.getWeight(fy);
            double label = x < 3.0 && y < 3.0 || x > 7.0 && y > 7.0 ? 1.0 : -1.0;
            result.add(new Example(instance, ClassLabel.binaryLabel(label)));
        }
        return result;
    }

    public static Dataset makeLogisticRegressionData(Random rand, int m, double a, double b) {
        int numPos = 0;
        int numNeg = 0;
        BasicDataset data = new BasicDataset();
        for (int i = 0; i < m; ++i) {
            double r;
            ClassLabel y;
            double x = rand.nextDouble();
            double p = MathUtil.logistic(a * x + b);
            ClassLabel classLabel = y = p > (r = rand.nextDouble()) ? ClassLabel.positiveLabel(1.0) : ClassLabel.negativeLabel(-1.0);
            if (p > r) {
                ++numPos;
            } else {
                ++numNeg;
            }
            MutableInstance instance = new MutableInstance();
            instance.addNumeric(new Feature("x"), x);
            instance.addBinary(new Feature("bias"));
            data.add(new Example(instance, y));
        }
        System.out.println(m + " examples: " + numPos + " pos, " + numNeg + " neg");
        return data;
    }

    public static SequenceDataset makeToySequenceData() {
        return SampleDatasets.makeToySequenceData(new String[]{"you're a good man Charlie Brown", "where's Waldo?", "alas dear Yorick, I knew him well"});
    }

    public static SequenceDataset makeToySequenceTestData() {
        return SampleDatasets.makeToySequenceData(new String[]{"hello, World War III", "to be or 2B, that is a question"});
    }

    public static SequenceDataset makeToySequenceData(String[] lines) {
        SequenceDataset d = new SequenceDataset();
        for (int i = 0; i < lines.length; ++i) {
            String[] w = lines[i].split(" ");
            Example[] seq = new Example[w.length];
            for (int j = 0; j < w.length; ++j) {
                ClassLabel lab = Character.isUpperCase(w[j].charAt(0)) ? new ClassLabel("POS") : new ClassLabel("NEG");
                MutableInstance inst = new MutableInstance(lines[i] + ":" + j, "line" + i);
                inst.addBinary(new Feature("here " + w[j]));
                if (j > 1) {
                    inst.addBinary(new Feature("prev " + w[j - 1]));
                }
                if (j < w.length - 1) {
                    inst.addBinary(new Feature("next " + w[j + 1]));
                }
                inst.addBinary(new Feature("casePattern " + w[j].replaceAll("[A-Z]+", "A").replaceAll("[a-z]+", "a")));
                seq[j] = new Example(inst, lab);
            }
            d.addSequence(seq);
        }
        return d;
    }

    public static Dataset makeToy3ClassData(Random random2, int numInstances) {
        String[][] features = new String[][]{{"money", "cash", "sleep", "booze", "chocolate", "fun", "beer", "pizza"}, {"stocks", "bonds", "money", "cash", "influence", "power", "fame"}, {"chocolate", "beer", "pizza", "pringles", "popcorn", "spam", "crisco"}};
        String[] labels = new String[]{"homer", "marge", "bart"};
        BasicDataset dataset = new BasicDataset();
        for (int i = 0; i < numInstances; ++i) {
            int classLabel = random2.nextInt(labels.length);
            int numFeatures = random2.nextInt(3) + 2;
            MutableInstance instance = new MutableInstance();
            for (int j = 0; j < numFeatures; ++j) {
                int feature = random2.nextInt(features[classLabel].length);
                instance.addBinary(new Feature(new String[]{"word", features[classLabel][feature]}));
            }
            dataset.add(new Example(instance, new ClassLabel(labels[classLabel])));
        }
        return dataset;
    }

    public static Dataset sampleData(String name, boolean isTest) {
        if ("toy".equals(name)) {
            if (isTest) {
                return SampleDatasets.toyTest();
            }
            return SampleDatasets.toyTrain();
        }
        if ("bayes".equals(name)) {
            if (isTest) {
                return SampleDatasets.toyBayesTest();
            }
            return SampleDatasets.toyBayesTrain();
        }
        if ("bayesExtreme".equals(name)) {
            if (isTest) {
                return SampleDatasets.toyBayesExtremeTest();
            }
            return SampleDatasets.toyBayesExtremeTrain();
        }
        if ("bayesUnlabeled".equals(name)) {
            if (isTest) {
                return SampleDatasets.toyBayesExtremeTest();
            }
            return SampleDatasets.toyBayesExtremeUnlabeledTrain();
        }
        if ("num".equals(name)) {
            if (isTest) {
                return SampleDatasets.makeNumericData(new Random(666L), 2, 20);
            }
            return SampleDatasets.makeNumericData(new Random(999L), 2, 20);
        }
        if ("logistic".equals(name)) {
            if (isTest) {
                return SampleDatasets.makeLogisticRegressionData(new Random(666L), 50, 2.0, -2.0);
            }
            return SampleDatasets.makeLogisticRegressionData(new Random(999L), 50, 2.0, -2.0);
        }
        if ("bigLogistic".equals(name)) {
            if (isTest) {
                return SampleDatasets.makeLogisticRegressionData(new Random(666L), 1000, 2.0, -2.0);
            }
            return SampleDatasets.makeLogisticRegressionData(new Random(999L), 1000, 2.0, -2.0);
        }
        if ("sparseNum".equals(name)) {
            if (isTest) {
                return SampleDatasets.makeSparseNumericData(new Random(666L), 20);
            }
            return SampleDatasets.makeSparseNumericData(new Random(999L), 20);
        }
        if ("toy3".equals(name)) {
            if (isTest) {
                return SampleDatasets.makeToy3ClassData(new Random(666L), 50);
            }
            return SampleDatasets.makeToy3ClassData(new Random(999L), 50);
        }
        if ("toySeq".equals(name)) {
            if (isTest) {
                return SampleDatasets.makeToySequenceTestData();
            }
            return SampleDatasets.makeToySequenceData();
        }
        throw new IllegalArgumentException("illegal dataset name '" + name + "'");
    }

    public static void main(String[] args) {
        try {
            Dataset train = SampleDatasets.sampleData(args[0], false);
            Dataset test = SampleDatasets.sampleData(args[0], true);
            log.debug("Train dataset is: ");
            log.debug(train.toString());
            log.debug("Test dataset is:");
            log.debug(test.toString());
            if (args.length > 0) {
                ClassifierLearner learner = (ClassifierLearner)Class.forName(args[1]).newInstance();
                boolean active = args.length >= 3 && "active".equals(args[2]);
                DatasetClassifierTeacher teacher = new DatasetClassifierTeacher(train, active);
                Classifier c = teacher.train(learner);
                log.info("Classifier: " + c);
                SampleDatasets.traceClassifier("Train", c, train);
                SampleDatasets.traceClassifier("Test", c, test);
                if (c instanceof Visible) {
                    new ViewerFrame(args[1] + " on " + args[0], ((Visible)((Object)c)).toGUI());
                }
            }
        }
        catch (Exception e) {
            System.out.println("usage: [toy|num] edu.cmu.minorthird.classify.SomeLearner [active]");
            e.printStackTrace();
        }
    }

    private static void traceClassifier(String datasetName, Classifier c, Dataset d) {
        log.info("");
        log.info("Performance on dataset " + datasetName + ":");
        Iterator<Example> i = d.iterator();
        while (i.hasNext()) {
            Example e = i.next();
            if (c instanceof BinaryClassifier) {
                double actual = e.getLabel().numericLabel();
                double predicted = c.classification(e).posWeight();
                String ok = predicted * actual >= 0.0 ? "Y" : "N";
                log.info(ok + "\tpred=" + predicted + "\tactual=" + actual + "\t" + e);
                continue;
            }
            ClassLabel actual = e.getLabel();
            ClassLabel predicted = c.classification(e);
            String ok = predicted.isCorrect(actual) ? "Y" : "N";
            log.info(ok + "\tpred=" + predicted + "\tactual=" + actual + "\t" + e);
        }
    }
}

