/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.classify.transform;

import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.ExampleSchema;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.MutableInstance;
import edu.cmu.minorthird.classify.transform.AbstractInstanceTransform;
import edu.cmu.minorthird.classify.transform.InstanceTransform;
import edu.cmu.minorthird.classify.transform.InstanceTransformLearner;
import gnu.trove.TObjectDoubleHashMap;
import java.io.Serializable;
import java.util.Iterator;

public class TFIDFTransformLearner
implements InstanceTransformLearner,
Serializable {
    static final long serialVersionUID = 20080201L;
    private TObjectDoubleHashMap featureFreq;
    private double numDocuments;

    public void setSchema(ExampleSchema schema) {
    }

    public InstanceTransform batchTrain(Dataset dataset) {
        this.numDocuments = dataset.size();
        this.featureFreq = new TObjectDoubleHashMap();
        Iterator<Example> i = dataset.iterator();
        while (i.hasNext()) {
            Example e = i.next();
            Iterator<Feature> j = e.featureIterator();
            while (j.hasNext()) {
                Feature f = j.next();
                double d = this.featureFreq.get(f);
                this.featureFreq.put(f, d + 1.0);
            }
        }
        return new TFIDFWeighter(this.numDocuments, this.featureFreq);
    }

    private class TFIDFWeighter
    extends AbstractInstanceTransform
    implements Serializable {
        static final long serialVersionUID = 20080201L;
        private double numDocuments;
        private TObjectDoubleHashMap featureFreq;

        public TFIDFWeighter(double numDocuments, TObjectDoubleHashMap featureFreq) {
            this.numDocuments = numDocuments;
            this.featureFreq = featureFreq;
        }

        public Instance transform(Instance instance) {
            double norm = 0.0;
            Iterator<Feature> i = instance.featureIterator();
            while (i.hasNext()) {
                Feature g = i.next();
                double unnormalized = this.unnormalizedTFIDFWeight(g, instance);
                norm += unnormalized * unnormalized;
            }
            norm = Math.sqrt(norm);
            MutableInstance result = new MutableInstance(instance.getSource(), instance.getSubpopulationId());
            Iterator<Feature> i2 = instance.featureIterator();
            while (i2.hasNext()) {
                Feature f = i2.next();
                double w = this.unnormalizedTFIDFWeight(f, instance);
                result.addNumeric(f, w / norm);
            }
            return result;
        }

        private double unnormalizedTFIDFWeight(Feature f, Instance instance) {
            double df = this.featureFreq.get(f);
            if (df == 0.0) {
                df = 1.0;
            }
            return Math.log(instance.getWeight(f) + 1.0) * Math.log(this.numDocuments / df);
        }

        public String toString() {
            return "[TFIDFWeighter]";
        }
    }
}

