/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.text;

import edu.cmu.minorthird.text.AbstractAnnotator;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextLabels;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.log4j.Logger;

public class ExtractAbbrev
extends AbstractAnnotator {
    private Logger log = Logger.getLogger(ExtractAbbrev.class);
    public static final String PROVIDED_ANNOTATION = "abbrev";
    public static final String SHORT_FORM_TYPE = "abbrevShort";
    public static final String LONG_FORM_TYPE = "abbrevLong";
    public static final String LONG_FORM_PROP = "expansion";
    public static final String SHORT_FORM_PROP = "acronym";
    private Map<String, Vector<String>> mTestDefinitions = new HashMap<String, Vector<String>>();
    private int truePositives = 0;
    private int falsePositives = 0;
    private int falseNegatives = 0;
    private int trueNegatives = 0;
    private static final char DELIMITER = '\t';
    private boolean testMode = false;
    private List<StringSpan> accum = new ArrayList<StringSpan>();
    private boolean annotationMode = false;

    protected void doAnnotate(MonotonicTextLabels labels) {
        this.annotationMode = true;
        int k = 0;
        Iterator<Span> i = labels.getTextBase().documentSpanIterator();
        while (i.hasNext()) {
            this.accum.clear();
            Span doc = i.next();
            String s = doc.getDocumentContents();
            this.extractAbbrPairsFromString(s);
            Iterator<StringSpan> j = this.accum.iterator();
            while (j.hasNext()) {
                StringSpan shortForm = j.next();
                StringSpan longForm = j.next();
                Span shortSpan = doc.charIndexSubSpan(shortForm.lo, shortForm.hi);
                this.log.debug("shortSpan[" + shortForm.lo + ".." + shortForm.hi + "] of doc: near '" + doc.getDocumentContents().substring(shortForm.lo, shortForm.hi) + "'");
                this.log.debug("shortForm='" + shortForm.asString() + "' shortSpan='" + shortSpan.asString() + "'");
                Span longSpan = doc.charIndexSubSpan(longForm.lo, longForm.hi);
                labels.addToType(shortSpan, SHORT_FORM_TYPE);
                labels.addToType(longSpan, LONG_FORM_TYPE);
                labels.setProperty(shortSpan, SHORT_FORM_PROP, Integer.toString(++k));
                labels.setProperty(longSpan, LONG_FORM_PROP, Integer.toString(k));
            }
        }
        this.annotationMode = false;
        labels.setAnnotatedBy(PROVIDED_ANNOTATION);
    }

    public String explainAnnotation(TextLabels labels, Span span) {
        return "No explanation implemented.";
    }

    private boolean isValidShortForm(String str) {
        return this.hasLetter(str) && (Character.isLetterOrDigit(str.charAt(0)) || str.charAt(0) == '(');
    }

    private boolean hasLetter(String str) {
        for (int i = 0; i < str.length(); ++i) {
            if (!Character.isLetter(str.charAt(i))) continue;
            return true;
        }
        return false;
    }

    private boolean hasCapital(String str) {
        for (int i = 0; i < str.length(); ++i) {
            if (!Character.isUpperCase(str.charAt(i))) continue;
            return true;
        }
        return false;
    }

    private void loadTrueDefinitions(String inFile) {
        String str = "";
        Map<String, Vector<String>> definitions = this.mTestDefinitions;
        try {
            BufferedReader fin = new BufferedReader(new FileReader(inFile));
            while ((str = fin.readLine()) != null) {
                int j = str.indexOf(9);
                String abbrString = str.substring(0, j).trim();
                String defnString = str.substring(j, str.length()).trim();
                Vector<String> entry = definitions.get(abbrString);
                if (entry == null) {
                    entry = new Vector();
                }
                entry.add(defnString);
                definitions.put(abbrString, entry);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
            System.out.println(str);
        }
    }

    private boolean isTrueDefinition(String shortForm, String longForm) {
        Vector<String> entry = this.mTestDefinitions.get(shortForm);
        if (entry == null) {
            return false;
        }
        Iterator<String> itr = entry.iterator();
        while (itr.hasNext()) {
            if (!itr.next().toString().equalsIgnoreCase(longForm)) continue;
            return true;
        }
        return false;
    }

    private void extractAbbrPairsFromFile(String inFile) {
        try {
            String line;
            BufferedReader fin = new BufferedReader(new FileReader(inFile));
            String content = "";
            while ((line = fin.readLine()) != null) {
                content = content + line + " ";
            }
            this.extractAbbrPairsFromString(content);
        }
        catch (Exception ioe) {
            ioe.printStackTrace();
        }
    }

    private void extractAbbrPairsFromString(String currString) {
        String[] sentence = currString.split("\\.\\s{2}");
        int offset = 0;
        for (int i = 0; i < sentence.length; ++i) {
            this.extractAbbrPairsFromSentence(new StringSpan(currString, offset, offset + sentence[i].length()));
            offset += sentence[i].length() + 3;
        }
    }

    private void extractAbbrPairsFromSentence(StringSpan currSentence) {
        StringSpan longForm = StringSpan.EMPTY;
        StringSpan shortForm = StringSpan.EMPTY;
        int closeParenIndex = -1;
        int tmpIndex = -1;
        this.log.debug("finding pairs in '" + currSentence.asString() + "'");
        int openParenIndex = currSentence.indexOf(" (");
        do {
            if (openParenIndex > -1) {
                ++openParenIndex;
            }
            int sentenceEnd = Math.max(currSentence.lastIndexOf(". "), currSentence.lastIndexOf(", "));
            if (openParenIndex != -1 || sentenceEnd != -1) {
                if (openParenIndex == -1) {
                    currSentence = currSentence.substring(sentenceEnd + 2);
                } else {
                    closeParenIndex = currSentence.indexOf(')', openParenIndex);
                    if (closeParenIndex > -1) {
                        sentenceEnd = Math.max(currSentence.lastIndexOf(". ", openParenIndex), currSentence.lastIndexOf(", ", openParenIndex));
                        if (sentenceEnd == -1) {
                            sentenceEnd = -2;
                        }
                        longForm = new StringSpan(currSentence, sentenceEnd + 2, openParenIndex);
                        shortForm = new StringSpan(currSentence, openParenIndex + 1, closeParenIndex);
                    }
                }
            }
            if (shortForm.length() > 0 || longForm.length() > 0) {
                if (shortForm.length() > 1 && longForm.length() > 1) {
                    StringTokenizer shortTokenizer;
                    int newCloseParenIndex;
                    if (shortForm.indexOf('(') > -1 && (newCloseParenIndex = currSentence.indexOf(')', closeParenIndex + 1)) > -1) {
                        shortForm = new StringSpan(currSentence, openParenIndex + 1, newCloseParenIndex);
                        closeParenIndex = newCloseParenIndex;
                    }
                    if ((tmpIndex = shortForm.indexOf(", ")) > -1) {
                        shortForm = shortForm.substring(0, tmpIndex);
                    }
                    if ((tmpIndex = shortForm.indexOf("; ")) > -1) {
                        shortForm = shortForm.substring(0, tmpIndex);
                    }
                    if ((shortTokenizer = new StringTokenizer(shortForm.asString())).countTokens() > 2 || shortForm.length() > longForm.length()) {
                        tmpIndex = currSentence.lastIndexOf(" ", openParenIndex - 2);
                        StringSpan tmpStr = new StringSpan(currSentence, tmpIndex + 1, openParenIndex - 1);
                        longForm = shortForm;
                        shortForm = tmpStr;
                        if (!this.hasCapital(shortForm.asString())) {
                            shortForm = StringSpan.EMPTY;
                        }
                    }
                    if (this.isValidShortForm(shortForm.asString())) {
                        this.extractAbbrPair(shortForm.trim(), longForm.trim());
                    }
                }
                currSentence = currSentence.substring(closeParenIndex + 1);
            } else if (openParenIndex > -1) {
                if (currSentence.length() - openParenIndex <= 200) break;
                currSentence = currSentence.substring(openParenIndex + 1);
                break;
            }
            shortForm = StringSpan.EMPTY;
            longForm = StringSpan.EMPTY;
        } while ((openParenIndex = currSentence.indexOf(" (")) > -1);
    }

    private StringSpan findBestLongForm(StringSpan shortForm, StringSpan longForm) {
        int lIndex = longForm.length() - 1;
        for (int sIndex = shortForm.length() - 1; sIndex >= 0; --sIndex) {
            char currChar = Character.toLowerCase(shortForm.charAt(sIndex));
            if (!Character.isLetterOrDigit(currChar)) continue;
            while (lIndex >= 0 && Character.toLowerCase(longForm.charAt(lIndex)) != currChar || sIndex == 0 && lIndex > 0 && Character.isLetterOrDigit(longForm.charAt(lIndex - 1))) {
                --lIndex;
            }
            if (lIndex < 0) {
                return null;
            }
            --lIndex;
        }
        lIndex = longForm.lastIndexOf(" ", lIndex) + 1;
        return longForm.substring(lIndex);
    }

    private void extractAbbrPair(StringSpan shortForm, StringSpan longForm) {
        this.log.debug("finding long form for '" + shortForm.asString() + "' and '" + longForm.asString() + "'");
        if (shortForm.length() == 1) {
            return;
        }
        StringSpan bestLongForm = this.findBestLongForm(shortForm, longForm);
        if (bestLongForm == null) {
            return;
        }
        StringTokenizer tokenizer = new StringTokenizer(bestLongForm.asString(), " \t\n\r\f-");
        int longFormSize = tokenizer.countTokens();
        int shortFormSize = shortForm.length();
        for (int i = shortFormSize - 1; i >= 0; --i) {
            if (Character.isLetterOrDigit(shortForm.charAt(i))) continue;
            --shortFormSize;
        }
        if (bestLongForm.length() < shortForm.length() || bestLongForm.indexOf(shortForm.asString() + " ") > -1 || bestLongForm.asString().endsWith(shortForm.asString()) || longFormSize > shortFormSize * 2 || longFormSize > shortFormSize + 5 || shortFormSize > 10) {
            return;
        }
        if (this.annotationMode) {
            this.accum.add(shortForm);
            this.accum.add(bestLongForm);
        }
        if (this.testMode) {
            if (this.isTrueDefinition(shortForm.asString(), bestLongForm.asString())) {
                System.out.println(shortForm.asString() + '\t' + bestLongForm.asString() + '\t' + "TP");
                ++this.truePositives;
            } else {
                ++this.falsePositives;
                System.out.println(shortForm.asString() + '\t' + bestLongForm.asString() + '\t' + "FP");
            }
        } else if (!this.annotationMode) {
            System.out.println(shortForm.asString() + '\t' + bestLongForm.asString());
        }
    }

    private static void usage() {
        System.err.println("Usage: ExtractAbbrev [-options] <filename>");
        System.err.println("       <filename> contains text from which abbreviations are extracted");
        System.err.println("       -testlist <file> = list of true abbreviation definition pairs");
        System.err.println("       -usage or -help = this message");
        System.exit(1);
    }

    public static void main(String[] args) {
        ExtractAbbrev extractAbbrev = new ExtractAbbrev();
        String filename = null;
        String testList = null;
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-testlist")) {
                if (i == args.length - 1) {
                    ExtractAbbrev.usage();
                }
                testList = args[++i];
                extractAbbrev.testMode = true;
                continue;
            }
            if (args[i].equals("-usage")) {
                ExtractAbbrev.usage();
                continue;
            }
            if (args[i].equals("-help")) {
                ExtractAbbrev.usage();
                continue;
            }
            filename = args[i];
            if (i == args.length - 1) continue;
            ExtractAbbrev.usage();
        }
        if (filename == null) {
            ExtractAbbrev.usage();
        }
        if (extractAbbrev.testMode) {
            extractAbbrev.loadTrueDefinitions(testList);
        }
        extractAbbrev.extractAbbrPairsFromFile(filename);
        if (extractAbbrev.testMode) {
            System.out.println("TP: " + extractAbbrev.truePositives + " FP: " + extractAbbrev.falsePositives + " FN: " + extractAbbrev.falseNegatives + " TN: " + extractAbbrev.trueNegatives);
        }
    }

    private static class StringSpan {
        public static final StringSpan EMPTY = new StringSpan("", 0, 0);
        String base;
        int lo;
        int hi;
        String mySubstring;

        public StringSpan(String b, int lo, int hi) {
            this.base = b;
            this.lo = lo;
            this.hi = hi;
            this.mySubstring = this.base.substring(lo, hi);
        }

        public StringSpan(StringSpan ss, int lo, int hi) {
            this.base = ss.base;
            this.lo = ss.lo + lo;
            this.hi = ss.lo + hi;
            this.mySubstring = this.base.substring(this.lo, this.hi);
        }

        public int offset() {
            return this.lo;
        }

        public int length() {
            return this.hi - this.lo;
        }

        public char charAt(int i) {
            return this.mySubstring.charAt(i);
        }

        public int indexOf(char ch) {
            return this.mySubstring.indexOf(ch);
        }

        public int indexOf(char ch, int fromIndex) {
            return this.mySubstring.indexOf(ch, fromIndex);
        }

        public int indexOf(String s) {
            return this.mySubstring.indexOf(s);
        }

        public int lastIndexOf(String s) {
            return this.mySubstring.lastIndexOf(s);
        }

        public int lastIndexOf(String s, int fromIndex) {
            return this.mySubstring.lastIndexOf(s, fromIndex);
        }

        public String asString() {
            return this.mySubstring;
        }

        public StringSpan substring(int newLo, int newHi) {
            return new StringSpan(this.base, this.lo + newLo, this.lo + newHi);
        }

        public StringSpan substring(int newLo) {
            return new StringSpan(this.base, this.lo + newLo, this.hi);
        }

        public StringSpan trim() {
            StringSpan ss = new StringSpan(this.base, this.lo, this.hi);
            while (ss.lo < ss.hi && Character.isWhitespace(ss.base.charAt(ss.lo))) {
                ++ss.lo;
            }
            while (ss.hi > ss.lo && Character.isWhitespace(ss.base.charAt(ss.hi - 1))) {
                --ss.hi;
            }
            ss.mySubstring = ss.base.substring(ss.lo, ss.hi);
            return ss;
        }
    }
}

