/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.classify.transform;

import edu.cmu.minorthird.classify.BasicFeatureIndex;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.ExampleSchema;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.SampleDatasets;
import edu.cmu.minorthird.classify.transform.InfoGainInstanceTransform;
import edu.cmu.minorthird.classify.transform.InstanceTransform;
import edu.cmu.minorthird.classify.transform.InstanceTransformLearner;
import java.util.Iterator;

public class InfoGainTransformLearner
implements InstanceTransformLearner {
    private String frequencyModel;

    public InfoGainTransformLearner() {
        this.frequencyModel = "document";
    }

    public InfoGainTransformLearner(String model) {
        this.frequencyModel = model;
    }

    public void setSchema(ExampleSchema schema) {
        if (!ExampleSchema.BINARY_EXAMPLE_SCHEMA.equals(schema)) {
            throw new IllegalStateException("can only learn binary example data");
        }
    }

    public InstanceTransform batchTrain(Dataset dataset) {
        InfoGainInstanceTransform filter = new InfoGainInstanceTransform();
        BasicFeatureIndex index = new BasicFeatureIndex(dataset);
        if (this.frequencyModel.equals("document")) {
            double dCntPos = index.size("POS");
            double dCntNeg = (double)dataset.size() - dCntPos;
            double totalEntropy = this.Entropy(dCntPos / (dCntPos + dCntNeg), dCntNeg / (dCntPos + dCntNeg));
            Iterator<Feature> i = index.featureIterator();
            while (i.hasNext()) {
                Feature f = i.next();
                double[] dCntWithF = new double[2];
                double[] dCntWithoutF = new double[2];
                dCntWithF[0] = index.size(f, "NEG");
                dCntWithF[0] = index.size(f, "NEG");
                dCntWithF[1] = (double)index.size(f) - dCntWithF[0];
                dCntWithoutF[0] = dCntNeg - dCntWithF[0];
                dCntWithoutF[1] = dCntPos - dCntWithF[1];
                double entropyWithF = this.Entropy(dCntWithF[1] / (dCntWithF[0] + dCntWithF[1]), dCntWithF[0] / (dCntWithF[0] + dCntWithF[1]));
                double entropyWithoutF = this.Entropy(dCntWithoutF[1] / (dCntWithoutF[0] + dCntWithoutF[1]), dCntWithoutF[0] / (dCntWithoutF[0] + dCntWithoutF[1]));
                double wf = (dCntWithF[0] + dCntWithF[1]) / (double)dataset.size();
                double infoGain = totalEntropy - wf * entropyWithF - (1.0 - wf) * entropyWithoutF;
                filter.addFeatureIG(infoGain, f);
            }
        } else if (this.frequencyModel.equals("word")) {
            System.out.println("warning: " + this.frequencyModel + " not implemented yet!");
            System.exit(1);
        } else {
            System.out.println("warning: " + this.frequencyModel + " is an unknown model for frequency!");
            System.exit(1);
        }
        return filter;
    }

    public double Entropy(double P1, double P2) {
        double entropy = P1 == 0.0 | P2 == 0.0 ? 0.0 : -P1 * Math.log(P1) / Math.log(2.0) - P2 * Math.log(P2) / Math.log(2.0);
        return entropy;
    }

    public double getLength(Example e) {
        double len = 0.0;
        Iterator<Feature> i = e.featureIterator();
        while (i.hasNext()) {
            Feature f = i.next();
            len += e.getWeight(f);
        }
        return len;
    }

    public static void main(String[] args) {
        Dataset dataset = SampleDatasets.sampleData("toy", false);
        System.out.println("old data:\n" + dataset);
        InfoGainTransformLearner learner = new InfoGainTransformLearner();
        InfoGainInstanceTransform filter = (InfoGainInstanceTransform)learner.batchTrain(dataset);
        filter.setNumberOfFeatures(100);
        dataset = filter.transform(dataset);
        System.out.println("new data:\n" + dataset);
    }
}

