/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.minorthird.text.learn;

import edu.cmu.minorthird.text.FancyLoader;
import edu.cmu.minorthird.text.MonotonicTextLabels;
import edu.cmu.minorthird.text.MutableTextLabels;
import edu.cmu.minorthird.text.RegexTokenizer;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.SpanDifference;
import edu.cmu.minorthird.text.TextLabelsLoader;
import edu.cmu.minorthird.text.learn.ExtractorAnnotator;
import edu.cmu.minorthird.text.learn.FreqAnal;
import edu.cmu.minorthird.text.mixup.MixupInterpreter;
import edu.cmu.minorthird.text.mixup.MixupProgram;
import edu.cmu.minorthird.util.BasicCommandLineProcessor;
import edu.cmu.minorthird.util.CommandLineProcessor;
import edu.cmu.minorthird.util.IOUtil;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ExtractorNameMatcher {
    private File fromFile = null;
    private File saveAs = null;
    private MonotonicTextLabels textLabels = null;
    private MonotonicTextLabels annLabels = null;
    private String predType = "_prediction";
    private String spanType = "";
    private static double threshold = 16.0;
    private ExtractorAnnotator ann = null;
    private SpanDifference finalSD = null;
    private List<String> nameDict = new ArrayList<String>();
    private static final String DIV = "@#!";
    private static final int WINDOW_SIZE = 5;
    private static final int SIG_SIZE = 2;
    private List<String> lowRiskNameList = new ArrayList<String>();
    private List<String> highRiskNameList = new ArrayList<String>();
    private List<String> deletedNameList = new ArrayList<String>();
    private static final String[] USAGE = new String[]{"ExtractorNameMatcher: increase recall of a previously-learned extractor, applying a name matching scheme", "", "Parameters:", " -loadFrom FILE     where to load a previously-learner extractor from", " -labels KEY        the key for the labels, in which names are to be extracted", " [-spanType String] the span type of the true names. The default is set to true_name", " [-saveAs FILE]     a file to save the new post-name matching labels", ""};

    public double getTokenPrecision() {
        return this.finalSD.tokenPrecision();
    }

    public double getTokenRecall() {
        return this.finalSD.tokenRecall();
    }

    public ExtractorNameMatcher(MonotonicTextLabels labels) {
        this.annLabels = labels;
    }

    public ExtractorNameMatcher() {
    }

    public CommandLineProcessor getCLP() {
        return new MyCLP();
    }

    public void doMain() {
        if (this.annLabels == null) {
            if (this.fromFile == null) {
                throw new IllegalStateException("need to specify -loadFrom");
            }
            try {
                this.ann = (ExtractorAnnotator)((Object)IOUtil.loadSerialized(this.fromFile));
            }
            catch (IOException ex) {
                throw new IllegalArgumentException("can't load annotator from " + this.fromFile + ": " + ex);
            }
            this.annLabels = (MonotonicTextLabels)this.ann.annotatedCopy(this.textLabels);
        }
        HashSet<String> allNames = new HashSet<String>();
        Iterator<Span> it = this.annLabels.instanceIterator(this.predType);
        while (it.hasNext()) {
            Span sp = it.next();
            allNames.add(sp.asString());
        }
        this.nameDict = new ArrayList<String>(allNames);
        Collections.sort(this.nameDict, new Comparator<String>(){

            @Override
            public int compare(String o1, String o2) {
                return new Integer(o2.length()).compareTo(new Integer(o1.length()));
            }
        });
        FreqAnal fa = new FreqAnal(this.annLabels, this.predType);
        this.transformDict(fa);
        int counter = 0;
        System.out.println("Low Risk Names:");
        Iterator<String> i = this.lowRiskNameList.iterator();
        while (i.hasNext()) {
            System.out.println(++counter + ". " + i.next());
        }
        counter = 0;
        System.out.println("High Risk Names:");
        i = this.highRiskNameList.iterator();
        while (i.hasNext()) {
            System.out.println(++counter + ". " + i.next());
        }
        counter = 0;
        System.out.println("Deleted Names:");
        i = this.deletedNameList.iterator();
        while (i.hasNext()) {
            System.out.println(++counter + ". " + i.next());
        }
        this.applyDict();
        MixupProgram p = null;
        try {
            p = new MixupProgram(new File("c:\\minorthird\\apps\\names\\fixEnv.mixup"));
        }
        catch (Exception e) {
            System.out.println(e);
        }
        MixupInterpreter interp = new MixupInterpreter(p);
        interp.eval(this.annLabels);
        if (this.saveAs != null) {
            try {
                new TextLabelsLoader().saveTypesAsOps(this.annLabels, this.saveAs);
            }
            catch (IOException e) {
                try {
                    new TextLabelsLoader().saveTypesAsOps(this.annLabels, new File("name-matching-labels.env"));
                }
                catch (Exception e2) {
                    System.out.println(e2);
                }
            }
        }
        System.out.println("============================================================");
        System.out.println("Pre names-matching:");
        SpanDifference sd = new SpanDifference(this.annLabels.instanceIterator(this.predType), this.annLabels.instanceIterator(this.spanType), this.annLabels.closureIterator(this.spanType));
        System.out.println(sd.toSummary());
        System.out.println("Post names-matching:");
        this.finalSD = new SpanDifference(this.annLabels.instanceIterator(this.predType + "_updated_fixed"), this.annLabels.instanceIterator(this.spanType), this.annLabels.closureIterator(this.spanType));
        System.out.println(this.finalSD.toSummary());
    }

    private void applyDict() {
        int counter = 0;
        Iterator<Span> i = this.annLabels.getTextBase().documentSpanIterator();
        while (i.hasNext()) {
            Span nameMatch;
            Span tokenWindow;
            int j;
            Span docSpan = i.next();
            System.out.println((float)(++counter) / (float)this.annLabels.getTextBase().size() * 100.0f + "% Working on " + docSpan.getDocumentId() + "...");
            for (j = 0; j < docSpan.size(); ++j) {
                tokenWindow = docSpan.subSpan(j, Math.min(docSpan.size() - j, 5));
                nameMatch = this.dictLookup(this.lowRiskNameList, tokenWindow);
                if (nameMatch == null) continue;
                System.out.println("! Found: " + nameMatch.asString().replaceAll("[\r\n\\s]+", " ") + " matches " + tokenWindow.asString().replaceAll("[\r\n\\s]+", " "));
                this.annLabels.addToType(nameMatch, this.predType + "_updated");
                j += nameMatch.size() - 1;
            }
            for (j = docSpan.size() - 2; j < docSpan.size(); ++j) {
                tokenWindow = docSpan.subSpan(j, Math.min(docSpan.size() - j, 5));
                nameMatch = this.dictLookup(this.highRiskNameList, tokenWindow);
                if (nameMatch == null) continue;
                System.out.println("! Found: " + nameMatch.asString().replaceAll("[\r\n\\s]+", " ") + " matches " + tokenWindow.asString().replaceAll("[\r\n\\s]+", " "));
                this.annLabels.addToType(nameMatch, this.predType + "_updated");
                j += nameMatch.size() - 1;
            }
        }
    }

    private Span dictLookup(List<String> nameList, Span tokenWindow) {
        RegexTokenizer tokenizer = new RegexTokenizer();
        for (String name : nameList) {
            String tokens = tokenWindow.asString().replaceAll("[\r\n\\s]+", " ");
            if (!tokens.toLowerCase().matches("(?i)(?s)^\\Q" + name + "\\E(\\W|$).*")) continue;
            int numTokens = tokenizer.splitIntoTokens(name).length;
            return tokenWindow.subSpan(0, numTokens);
        }
        return null;
    }

    private void transformDict(FreqAnal fa) {
        Iterator<String> i = this.nameDict.iterator();
        while (i.hasNext()) {
            List<String> transformedNames = this.transformName(i.next());
            for (String tn : transformedNames) {
                boolean lowRisk = tn.indexOf(DIV) == -1;
                boolean highRisk = tn.matches("(\\w@#!)+");
                Double hScore = fa.getHScore(tn = tn.replaceAll(DIV, ""));
                if (hScore != null && hScore < threshold) {
                    this.deletedNameList.add(tn);
                    continue;
                }
                if (lowRisk) {
                    this.lowRiskNameList.add(tn);
                    continue;
                }
                if (!highRisk) continue;
                this.highRiskNameList.add(tn);
            }
        }
        this.lowRiskNameList = this.uniqueSortedList(this.lowRiskNameList);
        this.highRiskNameList = this.uniqueSortedList(this.highRiskNameList);
        this.deletedNameList = this.uniqueSortedList(this.deletedNameList);
    }

    private List<String> transformName(String name) {
        int[][] order;
        ArrayList<String> result = new ArrayList<String>();
        String str = name.toLowerCase().trim().replaceAll("[^a-zA-Z\\- ]+", "");
        String[] s = str.split("[\\- ]+");
        Object[] array = new Object[]{};
        if (s.length == 1) {
            order = new int[][]{{0}};
            array = this.transform(s, order);
        } else if (s.length == 2) {
            order = new int[][]{{0, 1}, {0}};
            array = this.transform(s, order);
        } else if (s.length == 3) {
            order = new int[][]{{0, 1, 2}, {0, 2}, {2}, {0}};
            array = this.transform(s, order);
        } else if (s.length == 4) {
            order = new int[][]{{0, 1, 2, 3}, {0, 1, 3}, {0, 3}, {3}, {0}};
            array = this.transform(s, order);
        }
        for (int i = 0; i < array.length; ++i) {
            String temp = ((String)array[i]).trim();
            if (temp.replaceAll("\\W", "").length() < 2 || temp.matches(".*-$")) continue;
            result.add(temp);
        }
        return result;
    }

    private Object[] transform(String[] s, int[][] order) {
        int i;
        ArrayList<Object> result = new ArrayList<Object>();
        Object[][] o = new Object[s.length][];
        for (i = 0; i < s.length; ++i) {
            o[i] = this.transformToken(s[i], i == 0, i == s.length - 1);
        }
        for (i = 0; i < order.length; ++i) {
            int l;
            int k;
            int j;
            int[] cur_order = order[i];
            if (cur_order.length == 1) {
                for (j = 0; j < o[cur_order[0]].length; ++j) {
                    result.add(o[cur_order[0]][j]);
                }
                continue;
            }
            if (cur_order.length == 2) {
                for (j = 0; j < o[cur_order[0]].length; ++j) {
                    for (k = 0; k < o[cur_order[1]].length; ++k) {
                        result.add((String)o[cur_order[0]][j] + o[cur_order[1]][k]);
                    }
                }
                continue;
            }
            if (cur_order.length == 3) {
                for (j = 0; j < o[cur_order[0]].length; ++j) {
                    for (k = 0; k < o[cur_order[1]].length; ++k) {
                        for (l = 0; l < o[cur_order[2]].length; ++l) {
                            result.add((String)o[cur_order[0]][j] + o[cur_order[1]][k] + o[cur_order[2]][l]);
                        }
                    }
                }
                continue;
            }
            if (cur_order.length != 4) continue;
            for (j = 0; j < o[cur_order[0]].length; ++j) {
                for (k = 0; k < o[cur_order[1]].length; ++k) {
                    for (l = 0; l < o[cur_order[2]].length; ++l) {
                        for (int m = 0; m < o[cur_order[3]].length; ++m) {
                            result.add((String)o[cur_order[0]][j] + o[cur_order[1]][k] + o[cur_order[2]][l] + o[cur_order[3]][m]);
                        }
                    }
                }
            }
        }
        return result.toArray();
    }

    private List<String> uniqueSortedList(List<String> list) {
        HashSet<String> set = new HashSet<String>();
        for (String str : list) {
            set.add(str);
        }
        ArrayList<String> al = new ArrayList<String>(set);
        Collections.sort(al, new Comparator<String>(){

            @Override
            public int compare(String o1, String o2) {
                return new Integer(o2.length()).compareTo(o1.length());
            }
        });
        return al;
    }

    private Object[] transformToken(String name, boolean first, boolean last) {
        ArrayList<String> result = new ArrayList<String>();
        if (name.length() == 0) {
            return result.toArray();
        }
        if (last) {
            result.add(name);
        }
        if (!last) {
            result.add(name + " ");
        }
        if (!last) {
            result.add(name + "-");
        }
        if (!last) {
            result.add(name.substring(0, 1) + ". ");
        }
        if (last) {
            result.add(name.substring(0, 1) + ".");
        }
        result.add(name.substring(0, 1) + DIV);
        return result.toArray();
    }

    public static void main(String[] args) {
        try {
            ExtractorNameMatcher nm = new ExtractorNameMatcher();
            nm.getCLP().processArguments(args);
            nm.doMain();
        }
        catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    public class MyCLP
    extends BasicCommandLineProcessor {
        public void loadFrom(String s) {
            ExtractorNameMatcher.this.fromFile = new File(s);
        }

        public void saveAs(String s) {
            ExtractorNameMatcher.this.saveAs = new File(s);
        }

        public void labels(String s) {
            ExtractorNameMatcher.this.textLabels = (MutableTextLabels)FancyLoader.loadTextLabels(s);
        }

        public void spanType(String s) {
            ExtractorNameMatcher.this.spanType = s;
        }

        public void usage() {
            for (int i = 0; i < USAGE.length; ++i) {
                System.out.println(USAGE[i]);
            }
        }
    }
}

