/*
 * Decompiled with CFR 0.152.
 */
package iitb.Segment;

import iitb.Segment.DCTrainData;
import iitb.Segment.DCTrainRecord;
import iitb.Segment.LabelMap;
import iitb.Segment.TrainData;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.StringTokenizer;
import java.util.Vector;

public class DataCruncher {
    static String[] getTokenList(String text, String delimit, String impDelimit) {
        StringTokenizer textTok = new StringTokenizer(text.toLowerCase(), delimit, true);
        int tlen = 0;
        while (textTok.hasMoreTokens()) {
            String tokStr = textTok.nextToken();
            if (delimit.indexOf(tokStr) != -1 && impDelimit.indexOf(tokStr) == -1) continue;
            ++tlen;
        }
        String[] cArray = new String[tlen];
        tlen = 0;
        textTok = new StringTokenizer(text.toLowerCase(), delimit, true);
        while (textTok.hasMoreTokens()) {
            String tokStr = textTok.nextToken();
            if (delimit.indexOf(tokStr) != -1 && impDelimit.indexOf(tokStr) == -1) continue;
            cArray[tlen++] = tokStr;
        }
        return cArray;
    }

    static int readRowVarCol(int numLabels, BufferedReader tin, String tagDelimit, String delimit, String impDelimit, int[] t, String[][] cArray) throws IOException {
        int ptr = 0;
        int previousLabel = -1;
        while (true) {
            int label;
            String line = tin.readLine();
            StringTokenizer firstSplit = null;
            if (line != null) {
                firstSplit = new StringTokenizer(line.toLowerCase(), tagDelimit);
            }
            if (line == null || firstSplit.countTokens() < 2) {
                return ptr;
            }
            String w = firstSplit.nextToken();
            t[ptr] = label = Integer.parseInt(firstSplit.nextToken());
            cArray[ptr++] = DataCruncher.getTokenList(w, delimit, impDelimit);
            previousLabel = label;
        }
    }

    static int readRowFixedCol(int numLabels, BufferedReader tin, String tagDelimit, String delimit, String impDelimit, int[] t, String[][] cArray, int[] labels, StringTokenizer rawTok) throws IOException {
        String line = tin.readLine();
        if (line == null) {
            return 0;
        }
        StringTokenizer firstSplit = new StringTokenizer(line.toLowerCase(), tagDelimit, true);
        int ptr = 0;
        for (int i = 0; i < labels.length && firstSplit.hasMoreTokens(); ++i) {
            int label = labels[i];
            String w = firstSplit.nextToken();
            if (tagDelimit.indexOf(w) != -1) continue;
            if (firstSplit.hasMoreTokens()) {
                firstSplit.nextToken();
            }
            if (label <= 0 || label > numLabels) continue;
            t[ptr] = label;
            cArray[ptr++] = DataCruncher.getTokenList(w, delimit, impDelimit);
        }
        return ptr;
    }

    static int[] readHeaderInfo(int numLabels, BufferedReader tin, String tagDelimit) throws IOException {
        tin.mark(1000);
        String line = tin.readLine();
        if (line == null) {
            throw new IOException("Header row not present in tagged file");
        }
        if (!line.toLowerCase().startsWith("fixed-column-format")) {
            tin.reset();
            return null;
        }
        line = tin.readLine();
        StringTokenizer firstSplit = new StringTokenizer(line, tagDelimit);
        int[] labels = new int[numLabels];
        int i = 0;
        while (i < numLabels && firstSplit.hasMoreTokens()) {
            labels[i++] = Integer.parseInt(firstSplit.nextToken());
        }
        return labels;
    }

    public static TrainData readTagged(int numLabels, String tfile, String rfile, String delimit, String tagDelimit, String impDelimit, LabelMap labelMap) {
        try {
            String rawLine;
            Vector<DCTrainRecord> td = new Vector<DCTrainRecord>();
            BufferedReader tin = new BufferedReader(new FileReader(tfile + ".tagged"));
            BufferedReader rin = new BufferedReader(new FileReader(rfile + ".raw"));
            boolean fixedColFormat = false;
            int[] t = new int[]{};
            String[] zeroString = new String[]{};
            String[][] cArray = new String[0][0];
            int[] labels = null;
            labels = DataCruncher.readHeaderInfo(numLabels, tin, tagDelimit);
            if (labels != null) {
                fixedColFormat = true;
            }
            while ((rawLine = rin.readLine()) != null) {
                StringTokenizer rawTok = new StringTokenizer(rawLine, delimit, true);
                int len = rawTok.countTokens();
                if (len > t.length) {
                    t = new int[len];
                    cArray = new String[len][0];
                }
                int ptr = 0;
                ptr = fixedColFormat ? DataCruncher.readRowFixedCol(numLabels, tin, tagDelimit, delimit, impDelimit, t, cArray, labels, rawTok) : DataCruncher.readRowVarCol(numLabels, tin, tagDelimit, delimit, impDelimit, t, cArray);
                if (ptr == 0) break;
                int[] at = new int[ptr];
                String[][] c = new String[ptr][0];
                for (int i = 0; i < ptr; ++i) {
                    at[i] = labelMap.map(t[i]);
                    c[i] = cArray[i];
                }
                td.add(new DCTrainRecord(at, c));
            }
            return new DCTrainData(td);
        }
        catch (IOException e) {
            System.err.println("I/O Error" + e);
            System.exit(-1);
            return null;
        }
    }

    public static void readRaw(Vector data, String file, String delimit, String impDelimit) {
        try {
            String line;
            BufferedReader rin = new BufferedReader(new FileReader(file + ".raw"));
            while ((line = rin.readLine()) != null) {
                StringTokenizer tok = new StringTokenizer(line.toLowerCase(), delimit, true);
                String[] seq = new String[tok.countTokens()];
                int count = 0;
                for (int i = 0; i < seq.length; ++i) {
                    String tokStr = tok.nextToken();
                    if (delimit.indexOf(tokStr) != -1 && impDelimit.indexOf(tokStr) == -1) continue;
                    seq[count++] = new String(tokStr);
                }
                String[] aseq = new String[count];
                for (int i = 0; i < count; ++i) {
                    aseq[i] = seq[i];
                }
                data.add(aseq);
            }
            rin.close();
        }
        catch (IOException e) {
            System.out.println("I/O Error" + e);
            System.exit(-1);
        }
    }

    public static void createRaw(String file, String tagDelimit) {
        try {
            String line;
            BufferedReader in = new BufferedReader(new FileReader(file + ".tagged"));
            PrintWriter out = new PrintWriter(new FileOutputStream(file + ".raw"));
            String rawLine = new String("");
            while ((line = in.readLine()) != null) {
                StringTokenizer t = new StringTokenizer(line, tagDelimit);
                if (t.countTokens() < 2) {
                    out.println(rawLine);
                    rawLine = new String("");
                    continue;
                }
                rawLine = new String(rawLine + " " + t.nextToken());
            }
            out.println(rawLine);
            in.close();
            out.close();
        }
        catch (IOException e) {
            System.out.println("I/O Error" + e);
            System.exit(-1);
        }
    }
}

