/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.classify.transform;

import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.DatasetIndex;
import edu.cmu.minorthird.classify.ExampleSchema;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.transform.AbstractInstanceTransform;
import edu.cmu.minorthird.classify.transform.InstanceTransform;
import edu.cmu.minorthird.classify.transform.InstanceTransformLearner;
import edu.cmu.minorthird.classify.transform.MaskedInstance;
import java.util.HashSet;
import java.util.Iterator;

public class FrequencyBasedTransformLearner
implements InstanceTransformLearner {
    private String frequencyModel;
    private int minimumFrequency = 3;

    public FrequencyBasedTransformLearner() {
        this(3, "document");
    }

    public FrequencyBasedTransformLearner(int minimumFrequency) {
        this.frequencyModel = "document";
        this.minimumFrequency = minimumFrequency;
    }

    public FrequencyBasedTransformLearner(int minimumFrequency, String frequencyModel) {
        this.frequencyModel = frequencyModel;
        this.minimumFrequency = minimumFrequency;
    }

    public void setSchema(ExampleSchema schema) {
    }

    public InstanceTransform batchTrain(Dataset dataset) {
        final HashSet<Feature> activeFeatureSet = new HashSet<Feature>();
        DatasetIndex index = new DatasetIndex(dataset);
        if (this.frequencyModel.equals("document")) {
            Iterator<Feature> i = index.featureIterator();
            while (i.hasNext()) {
                Feature f = i.next();
                if (index.size(f) < this.minimumFrequency) continue;
                activeFeatureSet.add(f);
            }
        } else if (this.frequencyModel.equals("word")) {
            Iterator<Feature> i = index.featureIterator();
            while (i.hasNext()) {
                Feature f = i.next();
                double totalCounts = 0.0;
                for (int j = 0; j < index.size(f); ++j) {
                    totalCounts += index.getExample(f, j).getWeight(f);
                }
                if (!(totalCounts >= (double)this.minimumFrequency)) continue;
                activeFeatureSet.add(f);
            }
        } else {
            System.out.println("warning: " + this.frequencyModel + " is an unknown model for frequency!");
            System.exit(1);
        }
        return new AbstractInstanceTransform(){

            public Instance transform(Instance instance) {
                return new MaskedInstance(instance, activeFeatureSet);
            }

            public String toString() {
                return "[InstanceTransform: model = " + FrequencyBasedTransformLearner.this.frequencyModel + ", features appear >= " + FrequencyBasedTransformLearner.this.minimumFrequency + " times]";
            }
        };
    }
}

