/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.text;

import edu.cmu.minorthird.text.CompoundTokenizer;
import edu.cmu.minorthird.text.Document;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.text.TextToken;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeSet;
import org.apache.log4j.Logger;

public class SpanTypeTokenizer
extends CompoundTokenizer {
    private static Logger log = Logger.getLogger(SpanTypeTokenizer.class);
    private String spanType;
    private TextLabels labels;

    public SpanTypeTokenizer(String s, TextLabels l) {
        this.spanType = s;
        this.labels = l;
        this.parentTokenizer = l.getTextBase().getTokenizer();
    }

    public String getSpanType() {
        return this.spanType;
    }

    public TextLabels getTextLabels() {
        return this.labels;
    }

    public String[] splitIntoTokens(String string2) {
        return this.parentTokenizer.splitIntoTokens(string2);
    }

    public TextToken[] splitIntoTokens(Document document) {
        if (this.labels.getTextBase().getDocument(document.getId()) == null) {
            log.warn("Labels for document with id: " + document.getId() + " are not available, will tokenize using base tokenizer.");
            return this.parentTokenizer.splitIntoTokens(document);
        }
        if (!this.labels.getTextBase().getDocument(document.getId()).getText().equals(document.getText())) {
            log.warn("Document with id: " + document.getId() + " differs from the document in the labels set with the same ID.  Will tokenize using base tokenizer.");
            return this.parentTokenizer.splitIntoTokens(document);
        }
        TextToken[] parentTokens = this.labels.getTextBase().getDocument(document.getId()).getTokens();
        TreeSet<TextToken> sortedTokens = new TreeSet<TextToken>();
        for (int i = 0; i < parentTokens.length; ++i) {
            sortedTokens.add(parentTokens[i]);
        }
        ArrayList<TextToken> tokenList = new ArrayList<TextToken>();
        Iterator oldTokenIterator = sortedTokens.iterator();
        TextToken currOldToken = (TextToken)oldTokenIterator.next();
        Iterator<Span> typeIterator = this.labels.instanceIterator(this.spanType, document.getId());
        while (typeIterator.hasNext()) {
            Span currSpan = typeIterator.next();
            while (currOldToken.getLo() < currSpan.getTextToken(0).getLo()) {
                tokenList.add(new TextToken(document, currOldToken.getLo(), currOldToken.getLength()));
                currOldToken = (TextToken)oldTokenIterator.next();
            }
            tokenList.add(new TextToken(document, currSpan.getTextToken(0).getLo(), currSpan.asString().length()));
            for (int i = 0; i < currSpan.size(); ++i) {
                currOldToken = oldTokenIterator.hasNext() ? (TextToken)oldTokenIterator.next() : null;
            }
        }
        if (currOldToken != null) {
            tokenList.add(new TextToken(document, currOldToken.getLo(), currOldToken.getLength()));
        }
        while (oldTokenIterator.hasNext()) {
            currOldToken = (TextToken)oldTokenIterator.next();
            tokenList.add(new TextToken(document, currOldToken.getLo(), currOldToken.getLength()));
        }
        TextToken[] tokenArray = tokenList.toArray(new TextToken[0]);
        return tokenArray;
    }
}

