/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.text;

import edu.cmu.minorthird.text.BasicTextBase;
import edu.cmu.minorthird.text.BasicTextLabels;
import edu.cmu.minorthird.text.Document;
import edu.cmu.minorthird.text.MutableTextBase;
import edu.cmu.minorthird.text.MutableTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.SplitTokenizer;
import edu.cmu.minorthird.text.TextLabelsLoader;
import edu.cmu.minorthird.text.TextToken;
import edu.cmu.minorthird.text.Tokenizer;
import edu.cmu.minorthird.util.ProgressCounter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TextBaseLoader {
    public static final int NONE = 0;
    public static final int DIRECTORY_NAME = 1;
    public static final int FILE_NAME = 2;
    public static final int IN_FILE = 3;
    public static final int DOC_PER_LINE = 0;
    public static final int DOC_PER_FILE = 1;
    public static final boolean USE_XML = true;
    public static final boolean IGNORE_XML = false;
    private int documentStyle = 1;
    private boolean use_markup = true;
    private boolean recurseDirectories = false;
    private static Logger log = Logger.getLogger(TextBaseLoader.class);
    private int closurePolicy = 1;
    private List<StackEntry> stack;
    private MutableTextLabels labels;
    private MutableTextBase textBase;

    public TextBaseLoader() {
    }

    public TextBaseLoader(int documentStyle) {
        this.documentStyle = documentStyle;
    }

    public TextBaseLoader(int documentStyle, boolean use_markup) {
        this.documentStyle = documentStyle;
        this.use_markup = use_markup;
    }

    public TextBaseLoader(int documentStyle, boolean use_markup, boolean recurseDirectories) {
        this.documentStyle = documentStyle;
        this.use_markup = use_markup;
        this.recurseDirectories = recurseDirectories;
    }

    public TextBaseLoader(int documentStyle, int docID) {
        this.documentStyle = documentStyle;
    }

    public TextBaseLoader(int documentStyle, int docID, boolean use_markup) {
        this.documentStyle = documentStyle;
        this.use_markup = use_markup;
    }

    public TextBaseLoader(int documentStyle, int docID, int groupID, int categoryID) {
        this.documentStyle = documentStyle;
    }

    public TextBaseLoader(int documentStyle, int docID, int groupID, int categoryID, boolean labelsInFile, boolean recurseDirectories) {
        this.documentStyle = documentStyle;
        this.use_markup = labelsInFile;
        this.recurseDirectories = recurseDirectories;
    }

    public MutableTextBase load(File dataLocation) throws IOException, ParseException {
        this.textBase = new BasicTextBase();
        this.labels = new BasicTextLabels(this.textBase);
        if (dataLocation.isDirectory()) {
            this.loadDirectory(dataLocation);
        } else {
            this.loadFile(dataLocation);
        }
        return this.textBase;
    }

    public MutableTextBase load(File dataLocation, Tokenizer tok) throws IOException, ParseException {
        this.textBase = new BasicTextBase(tok);
        this.labels = new BasicTextLabels(this.textBase);
        if (dataLocation.isDirectory()) {
            this.loadDirectory(dataLocation);
        } else {
            this.loadFile(dataLocation);
        }
        return this.textBase;
    }

    public MutableTextBase loadWordPerLineFile(File file) throws IOException, FileNotFoundException {
        String line;
        this.textBase = new BasicTextBase(new SplitTokenizer(" "));
        this.labels = new BasicTextLabels(this.textBase);
        StringBuffer buf = new StringBuffer("");
        String id = file.getName();
        int docNum = 1;
        String curDocID = id + "-" + docNum;
        ArrayList<CharSpan> spanList = new ArrayList<CharSpan>();
        ArrayList<String> tokenPropList = new ArrayList<String>();
        LineNumberReader in = new LineNumberReader(new FileReader(file));
        int start = 0;
        int end = 0;
        while ((line = in.readLine()) != null) {
            String[] words = line.split("\\s");
            if (!words[0].equals("-DOCSTART-")) {
                if (words.length <= 2) continue;
                start = buf.length();
                buf.append(words[0] + " ");
                end = buf.length() - 1;
                tokenPropList.add(words[1]);
                if (words[3].equals("O")) continue;
                spanList.add(new CharSpan(start, end, words[3], curDocID));
                continue;
            }
            this.addDocument(buf.toString(), curDocID, spanList, tokenPropList);
            spanList.clear();
            tokenPropList.clear();
            buf = new StringBuffer("");
            curDocID = id + "-" + ++docNum;
        }
        in.close();
        return this.textBase;
    }

    public void setLabelsInFile(boolean b) {
        this.use_markup = b;
    }

    public void setDocumentStyle(int style) {
        this.documentStyle = style;
    }

    public void setRecurseDirectories(boolean rec) {
        this.recurseDirectories = rec;
    }

    public MutableTextLabels getLabels() {
        return this.labels;
    }

    private void loadDirectory(File directory) throws IOException, ParseException {
        Object[] files = directory.listFiles();
        Arrays.sort(files);
        if (files == null) {
            throw new IllegalArgumentException("can't list directory " + directory.getName());
        }
        ProgressCounter pc = new ProgressCounter("loading directory " + directory.getName(), "file", files.length);
        for (int i = 0; i < files.length; ++i) {
            if ("CVS".equals(((File)files[i]).getName())) continue;
            if (((File)files[i]).isDirectory() && this.recurseDirectories) {
                this.loadDirectory((File)files[i]);
            }
            if (((File)files[i]).isFile()) {
                this.loadFile((File)files[i]);
            }
            pc.progress();
        }
        pc.finished();
    }

    private void loadFile(File file) throws IOException, ParseException {
        log.debug("loadFile: " + file.getName());
        BufferedReader in = this.documentStyle == 0 ? new LineNumberReader(new FileReader(file)) : new BufferedReader(new FileReader(file));
        String curDocID = file.getName();
        ArrayList<CharSpan> spanList = new ArrayList<CharSpan>();
        this.stack = new ArrayList<StackEntry>();
        StringBuffer buf = new StringBuffer();
        while (in.ready()) {
            String line = in.readLine();
            if (this.use_markup) {
                line = this.labelLine(line, buf, curDocID, spanList);
            }
            if (this.documentStyle == 0) {
                if (line.trim().length() <= 0) continue;
                curDocID = file.getName() + "@line:" + ((LineNumberReader)in).getLineNumber();
                this.addDocument(line, curDocID, spanList, null);
                buf = new StringBuffer();
                spanList.clear();
                continue;
            }
            if (this.use_markup) continue;
            buf.append(line);
            buf.append("\n");
        }
        if (this.documentStyle == 1) {
            this.addDocument(buf.toString(), curDocID, spanList, null);
        }
        in.close();
    }

    private void addDocument(String docText, String documentId, List<CharSpan> spans, List<String> tokenProps) {
        if (docText.length() == 0) {
            log.warn("Text for document " + documentId + " is length zero or all white space, it will not be added to the text base.");
            return;
        }
        if (log.isDebugEnabled()) {
            log.debug("add document " + documentId);
        }
        this.textBase.loadDocument(documentId, docText);
        for (CharSpan charSpan : spans) {
            boolean flag = false;
            for (int i = charSpan.lo; i < charSpan.hi; ++i) {
                if (docText.charAt(i) == ' ' || docText.charAt(i) == '\n') continue;
                flag = true;
            }
            Span approxSpan = flag ? this.textBase.documentSpan(documentId).charIndexSubSpan(charSpan.lo, charSpan.hi) : this.textBase.documentSpan(documentId).charIndexSubSpan(charSpan.lo, charSpan.hi).getLeftBoundary();
            if (log.isDebugEnabled()) {
                int hi = charSpan.hi;
                if (hi > docText.length()) {
                    hi = docText.length();
                }
                log.debug("approximating " + charSpan.type + " span '" + docText.substring(charSpan.lo, hi) + "' with token span '" + approxSpan);
            }
            this.labels.addToType(approxSpan, charSpan.type);
        }
        if (tokenProps != null && tokenProps.size() > 0) {
            Document doc = this.textBase.getDocument(documentId);
            TextToken[] tokens = doc.getTokens();
            Iterator<String> itr = tokenProps.iterator();
            if (tokens.length > 0) {
                for (int x = 0; x < tokens.length; ++x) {
                    String nextPOS = itr.next();
                    if (nextPOS == null || tokens[x] == null) continue;
                    this.labels.setProperty(tokens[x], "POS", nextPOS);
                }
            }
        }
        new TextLabelsLoader().closeLabels(this.labels, this.closurePolicy);
    }

    protected String labelLine(String line, StringBuffer docBuffer, String docId, List<CharSpan> spanList) throws ParseException {
        if (this.stack == null) {
            this.stack = new ArrayList<StackEntry>();
        }
        Pattern markupPattern = Pattern.compile("</?([^ ><]+)( [^<>]+)?>");
        Matcher matcher = markupPattern.matcher(line);
        int currentChar = 0;
        while (matcher.find()) {
            boolean isOpenTag;
            String tag = matcher.group(1);
            boolean bl = isOpenTag = !matcher.group().startsWith("</");
            if (log.isDebugEnabled()) {
                log.debug("matcher.group='" + matcher.group() + "'");
                log.debug("found '" + tag + "' tag ,open=" + isOpenTag + ", at " + matcher.start() + " in:\n" + line);
            }
            docBuffer.append(line.substring(currentChar, matcher.start()));
            currentChar = matcher.end();
            if (isOpenTag) {
                this.stack.add(new StackEntry(docBuffer.length(), tag));
                continue;
            }
            StackEntry entry = null;
            for (int j = this.stack.size() - 1; j >= 0; --j) {
                entry = this.stack.get(j);
                if (!tag.equals(entry.markupTag)) continue;
                this.stack.remove(j);
                break;
            }
            if (entry == null) {
                throw new ParseException("close '" + tag + "' tag with no open in " + docId, 0);
            }
            if (!tag.equals(entry.markupTag)) {
                throw new ParseException("close '" + tag + "' tag paired with open '" + entry.markupTag + "'", entry.index);
            }
            if (log.isDebugEnabled()) {
                log.debug("adding a " + tag + " span from " + entry.index + " to " + docBuffer.length() + ": '" + docBuffer.substring(entry.index) + "'");
            }
            spanList.add(new CharSpan(entry.index, docBuffer.length(), tag, docId));
        }
        docBuffer.append(line.substring(currentChar, line.length()));
        docBuffer.append("\n");
        return docBuffer.toString();
    }

    private class CharSpan {
        public int lo;
        public int hi;
        String type;
        String docID;

        public CharSpan(int lo, int hi, String type, String docID) {
            this.lo = lo;
            this.hi = hi;
            this.type = type;
            this.docID = docID;
        }
    }

    private class StackEntry {
        public int index;
        public String markupTag;

        public StackEntry(int index, String markupTag) {
            this.index = index;
            this.markupTag = markupTag;
        }
    }
}

