/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.classify;

import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.ClassLabel;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.MutableInstance;
import edu.cmu.minorthird.classify.SGMExample;
import edu.cmu.minorthird.classify.multi.MultiClassLabel;
import edu.cmu.minorthird.classify.multi.MultiDataset;
import edu.cmu.minorthird.classify.multi.MultiExample;
import edu.cmu.minorthird.classify.relational.Link;
import edu.cmu.minorthird.classify.relational.RealRelationalDataset;
import edu.cmu.minorthird.classify.sequential.SequenceDataset;
import edu.cmu.minorthird.util.ProgressCounter;
import edu.cmu.minorthird.util.StringEncoder;
import edu.cmu.minorthird.util.StringUtil;
import edu.cmu.minorthird.util.gui.ViewerFrame;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class DatasetLoader {
    private static Logger log = Logger.getLogger(DatasetLoader.class);
    private static final StringEncoder stringCoder = new StringEncoder('%', " \t");
    private static final StringEncoder featureCoder = new StringEncoder('%', "=. \t");
    private static Map<String, ClassLabel> classLabelDict = new HashMap<String, ClassLabel>();

    public static void save(Dataset dataset, File file) throws IOException {
        PrintStream out = new PrintStream(new FileOutputStream(file));
        Iterator<Example> i = dataset.iterator();
        while (i.hasNext()) {
            out.println(DatasetLoader.asParsableString(i.next()));
        }
    }

    public static void saveRegression(Dataset dataset, File file) throws IOException {
        PrintStream out = new PrintStream(new FileOutputStream(file));
        Iterator<Example> i = dataset.iterator();
        while (i.hasNext()) {
            Example x = i.next();
            StringBuffer buf = new StringBuffer("");
            buf.append(x.getLabel().posWeight());
            buf.append('\t');
            buf.append(DatasetLoader.asParsableString(x));
            out.println(buf.toString());
        }
        out.close();
    }

    public static Dataset loadRegression(File file) throws IOException, NumberFormatException {
        String line;
        BasicDataset dataset = new BasicDataset();
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            int tab = line.indexOf(9);
            Example x = DatasetLoader.parseLine(line.substring(tab + 1), file, in);
            double score = StringUtil.atof(line.substring(0, tab));
            dataset.add(new Example(x.asInstance(), ClassLabel.positiveLabel(score)));
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        return dataset;
    }

    public static Dataset loadFile(File file) throws IOException, NumberFormatException {
        String line;
        BasicDataset dataset = new BasicDataset();
        ProgressCounter pc = new ProgressCounter("loading file " + file.getName(), "line");
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            dataset.add(DatasetLoader.parseLine(line, file, in));
            pc.progress();
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        pc.finished();
        return dataset;
    }

    public static void loadRelFile(File file, RealRelationalDataset dataset) throws IOException, NumberFormatException {
        String line;
        ProgressCounter pc = new ProgressCounter("loading file " + file.getName(), "line");
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            dataset.addSGM(DatasetLoader.RelparseLine(line, file, in));
            pc.progress();
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        pc.finished();
    }

    public static void loadLinkFile(File file, RealRelationalDataset dataset) throws IOException, NumberFormatException {
        String line;
        ProgressCounter pc = new ProgressCounter("loading file " + file.getName(), "line");
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            RealRelationalDataset.addLink(DatasetLoader.LinkparseLine(line, file, in));
            pc.progress();
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        pc.finished();
    }

    public static void loadRelTempFile(File file, RealRelationalDataset dataset) throws IOException, NumberFormatException {
        String line;
        ProgressCounter pc = new ProgressCounter("loading file " + file.getName(), "line");
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            String[] arr = line.split("\\s+");
            if (arr.length < 3) {
                throw new IllegalArgumentException("too few values at line#" + in.getLineNumber() + " of " + file.getName());
            }
            if (!arr[1].equals("ON")) {
                throw new IllegalArgumentException("the format of the relational template is COUNT ON LEFT");
            }
            RealRelationalDataset.addAggregator(arr[0], arr[2]);
            pc.progress();
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        pc.finished();
    }

    public static Dataset loadMulti(File file, int numDim) throws IOException, NumberFormatException {
        String line;
        MultiDataset dataset = new MultiDataset();
        ProgressCounter pc = new ProgressCounter("loading file " + file.getName(), "line");
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        while ((line = in.readLine()) != null) {
            dataset.addMulti(DatasetLoader.parseMultiLine(line, file, in, numDim));
            pc.progress();
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        pc.finished();
        return dataset;
    }

    public static void saveSequence(SequenceDataset dataset, File file) throws IOException {
        PrintStream out = new PrintStream(new FileOutputStream(file));
        Iterator<Example[]> i = dataset.sequenceIterator();
        while (i.hasNext()) {
            Example[] seq = i.next();
            for (int j = 0; j < seq.length; ++j) {
                out.println(DatasetLoader.asParsableString(seq[j]));
            }
            out.println("*");
        }
        out.close();
    }

    public static SequenceDataset loadSequence(File file) throws IOException, NumberFormatException {
        String line;
        SequenceDataset dataset = new SequenceDataset();
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        ArrayList<Example> list = new ArrayList<Example>();
        while ((line = in.readLine()) != null) {
            if ("*".equals(line)) {
                DatasetLoader.clearBuffer(list, dataset);
                continue;
            }
            list.add(DatasetLoader.parseLine(line, file, in));
        }
        if (list.size() > 0) {
            DatasetLoader.clearBuffer(list, dataset);
        }
        log.info("loaded " + dataset.size() + " examples from " + file.getName());
        in.close();
        return dataset;
    }

    private static void clearBuffer(List<Example> list, SequenceDataset dataset) {
        Example[] seq = list.toArray(new Example[list.size()]);
        dataset.addSequence(seq);
        list.clear();
    }

    private static String asParsableString(Example x) {
        StringBuffer buf = new StringBuffer("");
        buf.append('k');
        buf.append(' ');
        buf.append(stringCoder.encode(x.getSubpopulationId() != null ? x.getSubpopulationId() : "NUL"));
        buf.append(' ');
        buf.append(stringCoder.encode(x.getLabel().bestClassName()));
        buf.append(' ');
        DatasetLoader.appendParsableFeatures(buf, x);
        return buf.toString();
    }

    static void appendParsableFeatures(StringBuffer buf, Example x) {
        int j;
        Feature f;
        Iterator<Feature> i = x.binaryFeatureIterator();
        while (i.hasNext()) {
            f = i.next();
            buf.append(' ');
            for (j = 0; j < f.size(); ++j) {
                if (j > 0) {
                    buf.append('.');
                }
                buf.append(featureCoder.encode(f.getPart(j)));
            }
        }
        i = x.numericFeatureIterator();
        while (i.hasNext()) {
            f = i.next();
            buf.append(' ');
            for (j = 0; j < f.size(); ++j) {
                if (j > 0) {
                    buf.append('.');
                }
                buf.append(featureCoder.encode(f.getPart(j)));
            }
            buf.append("=" + x.getWeight(f));
        }
    }

    public static String getSourceAssignedToExample(String fileName, int lineNumber) {
        return fileName + ":" + lineNumber;
    }

    private static Example parseLine(String line, File file, LineNumberReader in) {
        String[] arr = line.split("\\s+");
        if (arr.length < 3) {
            throw new IllegalArgumentException("too few values at line#" + in.getLineNumber() + " of " + file.getName());
        }
        for (int i = 0; i < 3; ++i) {
            arr[i] = stringCoder.decode(arr[i]);
        }
        String subpopulationId = arr[1];
        String source = DatasetLoader.getSourceAssignedToExample(file.getName(), in.getLineNumber());
        if ("NUL".equals(arr[1])) {
            subpopulationId = null;
        }
        MutableInstance instance = new MutableInstance(source, subpopulationId);
        for (int i = 3; i < arr.length; ++i) {
            int eqPos = arr[i].indexOf("=");
            if (eqPos >= 0) {
                try {
                    String feature = arr[i].substring(0, eqPos);
                    String value = arr[i].substring(eqPos + 1);
                    double weight = Double.parseDouble(value);
                    instance.addNumeric(DatasetLoader.parseFeatureName(feature), weight);
                    continue;
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("bad feature# " + i + " line#" + in.getLineNumber() + " of " + file.getName());
                }
            }
            instance.addBinary(DatasetLoader.parseFeatureName(arr[i]));
        }
        ClassLabel label = classLabelDict.get(arr[2]);
        if (label == null) {
            if ("b".equals(arr[0])) {
                throw new IllegalArgumentException("should be POS/NEG but label is '" + arr[2] + "' at line#" + in.getLineNumber() + " of " + file.getName());
            }
            label = new ClassLabel(arr[2]);
            classLabelDict.put(arr[2], label);
        }
        return new Example(instance, label);
    }

    private static SGMExample RelparseLine(String line, File file, LineNumberReader in) {
        String[] arr = line.split("\\s+");
        if (arr.length < 4) {
            throw new IllegalArgumentException("too few values at line#" + in.getLineNumber() + " of " + file.getName());
        }
        String ID = arr[0];
        for (int i = 1; i < 4; ++i) {
            arr[i] = stringCoder.decode(arr[i]);
        }
        String subpopulationId = arr[2];
        String source = DatasetLoader.getSourceAssignedToExample(file.getName(), in.getLineNumber());
        if ("NUL".equals(arr[2])) {
            subpopulationId = null;
        }
        MutableInstance instance = new MutableInstance(source, subpopulationId);
        for (int i = 4; i < arr.length; ++i) {
            int eqPos = arr[i].indexOf("=");
            if (eqPos >= 0) {
                try {
                    String feature = arr[i].substring(0, eqPos);
                    String value = arr[i].substring(eqPos + 1);
                    double weight = Double.parseDouble(value);
                    instance.addNumeric(DatasetLoader.parseFeatureName(feature), weight);
                    continue;
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("bad feature# " + i + " line#" + in.getLineNumber() + " of " + file.getName());
                }
            }
            instance.addBinary(DatasetLoader.parseFeatureName(arr[i]));
        }
        ClassLabel label = classLabelDict.get(arr[3]);
        if (label == null) {
            if ("b".equals(arr[1])) {
                throw new IllegalArgumentException("should be POS/NEG but label is '" + arr[3] + "' at line#" + in.getLineNumber() + " of " + file.getName());
            }
            label = new ClassLabel(arr[3]);
            classLabelDict.put(arr[3], label);
        }
        return new SGMExample((Instance)instance, label, ID);
    }

    private static Link LinkparseLine(String line, File file, LineNumberReader in) {
        String[] arr = line.split("\\s+");
        if (arr.length < 3) {
            throw new IllegalArgumentException("too few values at line#" + in.getLineNumber() + " of " + file.getName());
        }
        return new Link(arr[0], arr[1], arr[2]);
    }

    private static MultiExample parseMultiLine(String line, File file, LineNumberReader in, int numDim) {
        int i;
        String[] arr = line.split("\\s+");
        if (arr.length < 2 + numDim) {
            throw new IllegalArgumentException("too few values at line#" + in.getLineNumber() + " of " + file.getName());
        }
        for (int i2 = 0; i2 < 2 + numDim; ++i2) {
            arr[i2] = stringCoder.decode(arr[i2]);
        }
        String subpopulationId = arr[1];
        String source = file.getName() + ":" + in.getLineNumber();
        if ("NUL".equals(arr[1])) {
            subpopulationId = null;
        }
        MutableInstance instance = new MutableInstance(source, subpopulationId);
        for (int i3 = 2 + numDim; i3 < arr.length; ++i3) {
            int eqPos = arr[i3].indexOf("=");
            if (eqPos >= 0) {
                try {
                    String feature = arr[i3].substring(0, eqPos);
                    String value = arr[i3].substring(eqPos + 1);
                    double weight = Double.parseDouble(value);
                    instance.addNumeric(DatasetLoader.parseFeatureName(feature), weight);
                    continue;
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("bad feature# " + i3 + " line#" + in.getLineNumber() + " of " + file.getName());
                }
            }
            instance.addBinary(DatasetLoader.parseFeatureName(arr[i3]));
        }
        ClassLabel[] labels = new ClassLabel[numDim];
        for (i = 2; i < 2 + numDim; ++i) {
            ClassLabel label;
            labels[i - 2] = label = classLabelDict.get(arr[i]);
        }
        for (i = 0; i < labels.length; ++i) {
            if (labels[i] != null) continue;
            if ("b".equals(arr[0])) {
                throw new IllegalArgumentException("should be POS/NEG but label is '" + arr[2 + i] + "' at line#" + in.getLineNumber() + " of " + file.getName());
            }
            labels[i] = new ClassLabel(arr[2 + i]);
            classLabelDict.put(arr[2 + i], labels[i]);
        }
        MultiClassLabel multiLabel = new MultiClassLabel(labels);
        return new MultiExample((Instance)instance, multiLabel);
    }

    private static Feature parseFeatureName(String string2) {
        String[] featureParts = string2.split("\\.");
        for (int j = 0; j < featureParts.length; ++j) {
            featureParts[j] = featureCoder.decode(featureParts[j]);
        }
        return new Feature(featureParts);
    }

    public static Dataset loadSVMStyle(File file) throws IOException {
        BasicDataset dataset = new BasicDataset();
        BufferedReader in = new BufferedReader(new FileReader(file));
        while (in.ready()) {
            String line = in.readLine();
            StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:");
            MutableInstance instance = new MutableInstance();
            String label = st.nextToken();
            double labelDouble = Double.parseDouble(label);
            label = "" + labelDouble;
            while (st.hasMoreTokens()) {
                String featureName = st.nextToken();
                String featureValue = st.nextToken();
                instance.addNumeric(new Feature(featureName), Double.parseDouble(featureValue));
            }
            Example example = new Example(instance, ClassLabel.binaryLabel(labelDouble));
            dataset.add(example);
        }
        return dataset;
    }

    public Object load(File f) throws IOException {
        return DatasetLoader.loadFile(f);
    }

    public static void main(String[] args) {
        try {
            boolean sequential = args[0].startsWith("-seq");
            boolean regression = args[0].startsWith("-reg");
            String dbName = sequential || regression ? args[1] : args[0];
            Dataset d = null;
            d = sequential ? DatasetLoader.loadSequence(new File(dbName)) : (regression ? DatasetLoader.loadRegression(new File(dbName)) : DatasetLoader.loadFile(new File(dbName)));
            new ViewerFrame("Data from " + dbName, d.toGUI());
        }
        catch (Exception e) {
            e.printStackTrace();
            System.out.println("usage: file");
        }
    }

    static {
        classLabelDict.put("POS", ClassLabel.positiveLabel(1.0));
        classLabelDict.put("NEG", ClassLabel.negativeLabel(-1.0));
    }
}

